chore: server code formatting

This commit is contained in:
Rafael Taranto 2025-05-12 15:35:00 +01:00
parent aedabcbdee
commit 68517170e2
234 changed files with 9824 additions and 6195 deletions

View file

@ -10,7 +10,7 @@ const OFAC_DATA_DIR = process.env.OFAC_DATA_DIR
let structs = null
function load () {
function load() {
if (!OFAC_DATA_DIR) {
const message = 'The ofacDataDir option has not been set in the environment'
return Promise.reject(new Error(message))
@ -19,10 +19,12 @@ function load () {
const ofacSourcesDir = path.join(OFAC_DATA_DIR, 'sources')
return readdir(ofacSourcesDir)
.then(_.flow(
_.map(file => path.join(ofacSourcesDir, file)),
loader.load
))
.then(
_.flow(
_.map(file => path.join(ofacSourcesDir, file)),
loader.load,
),
)
.then(result => {
return (structs = result)
})
@ -30,14 +32,14 @@ function load () {
// nameParts should be an object like {firstName: "John", lastName: "Doe", ...}
function makeCompatible (nameParts) {
function makeCompatible(nameParts) {
const partNames = _.keys(nameParts)
const values = _.map(_.lowerCase, _.values(nameParts))
const props = _.zipAll([partNames, values])
return _.map(_.zipObject(['partName', 'value']), props)
}
function match (nameParts, birthDateString, options) {
function match(nameParts, birthDateString, options) {
if (!structs) {
logger.error(new Error('The OFAC data sources have not been loaded yet.'))
return false
@ -50,24 +52,27 @@ function match (nameParts, birthDateString, options) {
// birthDateString is in YYYYMMDD format
const birthDate = _.cond([
[_.identity, () => {
const year = parseInt(birthDateString.slice(0, 4))
const month = parseInt(birthDateString.slice(4, 6))
const day = parseInt(birthDateString.slice(6, 8))
const date = new Date(year, month - 1, day)
[
_.identity,
() => {
const year = parseInt(birthDateString.slice(0, 4))
const month = parseInt(birthDateString.slice(4, 6))
const day = parseInt(birthDateString.slice(6, 8))
const date = new Date(year, month - 1, day)
return {year, month, day, date}
}],
[_.stubTrue, () => null]
return { year, month, day, date }
},
],
[_.stubTrue, () => null],
])(birthDateString)
const candidate = {parts, fullName, words, birthDate}
const candidate = { parts, fullName, words, birthDate }
const result = matcher.match(structs, candidate, options)
return result
}
function getStructs () {
function getStructs() {
return structs
}
module.exports = {load, match, getStructs}
module.exports = { load, match, getStructs }

View file

@ -2,20 +2,18 @@ const fs = require('fs')
const ndjson = require('ndjson')
const _ = require('lodash/fp')
const mapAliases = _.curry((iteratee, individuals) => {
const mapIndividual = individual => {
const {id, aliases} = individual
const { id, aliases } = individual
return _.map(alias => iteratee(id, alias), aliases)
}
return _.flatMap(mapIndividual, individuals)
})
const getPhoneticEntries = (individualId, alias) => {
const pairPhoneticsWithValues = word => {
const {value, phonetics} = word
const makeEntry = phonetic => ({value, phonetic, aliasId: alias.id})
const { value, phonetics } = word
const makeEntry = phonetic => ({ value, phonetic, aliasId: alias.id })
return _.map(makeEntry, phonetics)
}
return _.flatMap(pairPhoneticsWithValues, alias.words)
@ -25,17 +23,13 @@ const producePhoneticMap = _.flow(
mapAliases(getPhoneticEntries),
_.flatten,
_.groupBy(_.get('phonetic')),
_.mapValues(_.flow(
_.map(_.get('aliasId')),
_.uniq
)),
_.mapValues(_.flow(_.map(_.get('aliasId')), _.uniq)),
_.toPairs,
entries => new Map(entries)
entries => new Map(entries),
)
const getWords = (individualId, alias) => {
const pairWordsWithIds = word => ({value: word.value, aliasId: alias.id})
const pairWordsWithIds = word => ({ value: word.value, aliasId: alias.id })
return _.map(pairWordsWithIds, alias.words)
}
@ -45,7 +39,7 @@ const produceWordList = _.flow(
_.groupBy(_.get('value')),
_.mapValues(_.map(_.get('aliasId'))),
_.toPairs,
_.map(_.zipObject(['value', 'aliasIds']))
_.map(_.zipObject(['value', 'aliasIds'])),
)
const parseSource = source => {
@ -55,10 +49,13 @@ const parseSource = source => {
const jsonStream = readStream.pipe(ndjson.parse())
jsonStream.on('data', individual => {
_.forEach(period => {
_.forEach(date => {
const {year, month, day} = date
date.date = new Date(year, month - 1, day)
}, [period.start, period.end])
_.forEach(
date => {
const { year, month, day } = date
date.date = new Date(year, month - 1, day)
},
[period.start, period.end],
)
}, individual.birthDatePeriods)
individuals.push(individual)
})
@ -71,18 +68,14 @@ const parseSource = source => {
})
}
const load = sources => Promise.all(_.map(parseSource, sources))
.then(_.flow(
_.flatten,
_.compact,
_.uniqBy(_.get('id')),
individuals => {
const load = sources =>
Promise.all(_.map(parseSource, sources)).then(
_.flow(_.flatten, _.compact, _.uniqBy(_.get('id')), individuals => {
const individualsMap = _.flow(
_.groupBy(_.get('id')),
_.mapValues(_.first),
_.toPairs,
entries => new Map(entries)
entries => new Map(entries),
)(individuals)
const makeEntries = (individualId, alias) => [alias.id, alias]
@ -101,9 +94,9 @@ const load = sources => Promise.all(_.map(parseSource, sources))
aliasesMap,
aliasToIndividual,
phoneticMap,
wordList
wordList,
}
}
))
}),
)
module.exports = {load}
module.exports = { load }

View file

@ -7,7 +7,7 @@ const stringSimilarity = _.curry(jaro)
// birth date
function isDateWithinSomeDaysOfPeriod (period, date, days) {
function isDateWithinSomeDaysOfPeriod(period, date, days) {
const inMillisecs = 24 * 60 * 60 * 1000
const startTime = period.start.date.getTime() - days * inMillisecs
@ -16,32 +16,35 @@ function isDateWithinSomeDaysOfPeriod (period, date, days) {
const endTime = period.end.date.getTime() + days * inMillisecs
const endDate = new Date(endTime)
return (startDate < date && date < endDate)
return startDate < date && date < endDate
}
const isBornTooLongSince = _.curry((days, dateObject, individual) => {
if (!dateObject) return false
if (_.isEmpty(individual.birthDatePeriods)) return false
const isWithinSomeYears = _.partialRight(isDateWithinSomeDaysOfPeriod, [dateObject.date, days])
const isWithinSomeYears = _.partialRight(isDateWithinSomeDaysOfPeriod, [
dateObject.date,
days,
])
return !_.some(isWithinSomeYears, individual.birthDatePeriods)
})
// algorithm
function match (structs, candidate, options) {
const {threshold, fullNameThreshold, ratio = 0.5, verboseFor} = options
const {fullName, words, birthDate} = candidate
function match(structs, candidate, options) {
const { threshold, fullNameThreshold, ratio = 0.5, verboseFor } = options
const { fullName, words, birthDate } = candidate
// Accept aliases who's full name matches.
const doesNameMatch = _.flow(
_.get('fullName'),
stringSimilarity(fullName),
_.lte(fullNameThreshold)
_.lte(fullNameThreshold),
)
const aliases = _.flatMap(_.get('aliases'), structs.individuals)
const aliasIdsFromFullName = _.flow(
_.filter(doesNameMatch),
_.map(_.get('id'))
_.map(_.get('id')),
)(aliases)
const phoneticWeight = ratio
@ -60,12 +63,25 @@ function match (structs, candidate, options) {
for (const aliasId of wordEntry.aliasIds) {
const phoneticScore = phoneticMatches.has(aliasId) ? 1 : -1
const finalScore = stringWeight * stringScore + phoneticWeight * phoneticScore
const finalScore =
stringWeight * stringScore + phoneticWeight * phoneticScore
verbose && logger.debug(finalScore.toFixed(2), stringScore.toFixed(2), phoneticScore.toFixed(2), word.value, wordEntry.value)
verbose &&
logger.debug(
finalScore.toFixed(2),
stringScore.toFixed(2),
phoneticScore.toFixed(2),
word.value,
wordEntry.value,
)
if (finalScore >= threshold) {
const entry = {aliasId, score: finalScore, word: word.value, value: wordEntry.value}
const entry = {
aliasId,
score: finalScore,
word: word.value,
value: wordEntry.value,
}
const index = _.sortedIndexBy(x => -x.score, entry, matches)
matches.splice(index, 0, entry)
}
@ -83,10 +99,10 @@ function match (structs, candidate, options) {
_.countBy(_.identity),
_.toPairs,
_.filter(([aliasId, count]) => {
const {length} = structs.aliasesMap.get(aliasId).words
return (count >= _.min([2, words.length, length]))
const { length } = structs.aliasesMap.get(aliasId).words
return count >= _.min([2, words.length, length])
}),
_.map(_.first)
_.map(_.first),
)(matches)
// Get the full record for each matched id
@ -94,10 +110,9 @@ function match (structs, candidate, options) {
const individualId = structs.aliasToIndividual.get(aliasId)
return structs.individualsMap.get(individualId)
}
const suspects = _.uniq(_.map(getIndividual, [
...aliasIdsFromFullName,
...aliasIdsFromNamePart
]))
const suspects = _.uniq(
_.map(getIndividual, [...aliasIdsFromFullName, ...aliasIdsFromNamePart]),
)
// Reject everyone who is born two years away.
const twoYears = 365 * 2
@ -105,4 +120,4 @@ function match (structs, candidate, options) {
return _.reject(unqualified, suspects)
}
module.exports = {match}
module.exports = { match }

View file

@ -5,17 +5,24 @@ const makePhonetic = _.flow(doubleMetaphone, _.uniq)
// Combine name-parts in a standard order.
const partOrdering = ['firstName', 'middleName', 'maidenName', 'patronymic', 'matronymic', 'lastName']
const partOrdering = [
'firstName',
'middleName',
'maidenName',
'patronymic',
'matronymic',
'lastName',
]
const usingPartOrder = _.flow(
_.get('partName'),
_.partialRight(_.indexOf, [partOrdering])
_.partialRight(_.indexOf, [partOrdering]),
)
const makeFullName = _.flow(
_.sortBy(usingPartOrder),
_.map(_.get('value')),
_.join(' ')
_.join(' '),
)
const makeWords = value => {
@ -27,5 +34,5 @@ const makeWords = value => {
module.exports = {
makeFullName,
makeWords
makeWords,
}

View file

@ -25,7 +25,7 @@ const partNames = new Map([
[MAIDEN_NAME, 'maidenName'],
[PATRONYMIC, 'patronymic'],
[MATRONYMIC, 'matronymic'],
[NICKNAME, 'nickname']
[NICKNAME, 'nickname'],
])
const filteredWords = [
@ -34,7 +34,7 @@ const filteredWords = [
// group-id to type-id
function processMasterNamePartGroup (groupNode) {
function processMasterNamePartGroup(groupNode) {
const namePartGroupNode = groupNode.NamePartGroup
const groupId = namePartGroupNode.$.ID
const typeId = namePartGroupNode.$.NamePartTypeID
@ -47,7 +47,7 @@ const processDocumentedNamePart = _.curry((groupTypes, namePartNode) => {
const typeId = groupTypes.get(groupId)
const partName = partNames.get(typeId)
const value = _.lowerCase(valueNode.$text)
return {partName, value}
return { partName, value }
})
const isLatin = _.matchesProperty(['$', 'DocNameStatusID'], PRIMARY_LATIN)
@ -72,29 +72,26 @@ const processAlias = _.curry((groupTypes, aliasNode) => {
const fullName = nameUtils.makeFullName(parts)
const words = _.flow(
nameUtils.makeWords,
_.reject(_.flow(
_.get('value'),
word => filteredWords.includes(word)
))
_.reject(_.flow(_.get('value'), word => filteredWords.includes(word))),
)(fullName)
// if (words.length < 2) {
// console.log(JSON.stringify(words))
// }
return {id, parts, fullName, words}
return { id, parts, fullName, words }
})
// birth date
function processDate (dateNode) {
function processDate(dateNode) {
const year = parseInt(dateNode.Year)
const month = parseInt(dateNode.Month)
const day = parseInt(dateNode.Day)
return {year, month, day}
return { year, month, day }
}
function processFeature (featureNode) {
function processFeature(featureNode) {
if (featureNode.$.FeatureTypeID !== BIRTH_DATE) return
const datePeriodNode = featureNode.FeatureVersion.DatePeriod
@ -103,7 +100,7 @@ function processFeature (featureNode) {
// By using Start.From and End.To we use the extremes of the date-period.
const period = {
start: datePeriodNode.Start.From,
end: datePeriodNode.End.To
end: datePeriodNode.End.To,
}
return _.mapValues(processDate, period)
@ -111,13 +108,16 @@ function processFeature (featureNode) {
// profile
function processProfile (profileNode) {
function processProfile(profileNode) {
if (profileNode.$.PartySubTypeID !== INDIVIDUAL) return
const id = profileNode.$.ID
const identityNode = profileNode.Identity
const groupTypesEntries = _.map(processMasterNamePartGroup, identityNode.NamePartGroups.MasterNamePartGroup)
const groupTypesEntries = _.map(
processMasterNamePartGroup,
identityNode.NamePartGroups.MasterNamePartGroup,
)
const groupTypes = new Map(groupTypesEntries)
const mapCompact = _.flow(_.map, _.compact)
@ -128,7 +128,7 @@ function processProfile (profileNode) {
if (_.isEmpty(aliases)) return
const birthDatePeriods = mapCompact(processFeature, profileNode.Feature)
const individual = {id, aliases, birthDatePeriods}
const individual = { id, aliases, birthDatePeriods }
return individual
}
@ -158,4 +158,4 @@ const parse = (source, callback) => {
})
}
module.exports = {parse}
module.exports = { parse }

View file

@ -1,7 +1,14 @@
const parser = require('./parsing')
const axios = require('axios')
const { createWriteStream } = require('fs')
const { rename, writeFile, readFile, mkdir, copyFile, unlink } = require('fs/promises')
const {
rename,
writeFile,
readFile,
mkdir,
copyFile,
unlink,
} = require('fs/promises')
const path = require('path')
const _ = require('lodash/fp')
@ -10,17 +17,21 @@ const OFAC_DATA_DIR = process.env.OFAC_DATA_DIR
const OFAC_SOURCES_DIR = path.join(OFAC_DATA_DIR, 'sources')
const LAST_UPDATED_FILE = path.resolve(OFAC_DATA_DIR, 'last_updated.dat')
const OFAC_SOURCES = [{
name: 'sdn_advanced',
url: 'https://sanctionslistservice.ofac.treas.gov/api/download/sdn_advanced.xml'
}, {
name: 'cons_advanced',
url: 'https://sanctionslistservice.ofac.treas.gov/api/download/cons_advanced.xml'
}]
const OFAC_SOURCES = [
{
name: 'sdn_advanced',
url: 'https://sanctionslistservice.ofac.treas.gov/api/download/sdn_advanced.xml',
},
{
name: 'cons_advanced',
url: 'https://sanctionslistservice.ofac.treas.gov/api/download/cons_advanced.xml',
},
]
const _mkdir = path =>
mkdir(path)
.catch(err => err.code === 'EEXIST' ? Promise.resolve() : Promise.reject(err))
mkdir(path).catch(err =>
err.code === 'EEXIST' ? Promise.resolve() : Promise.reject(err),
)
const download = (dstDir, { name, url }) => {
const dstFile = path.join(dstDir, name + '.xml')
@ -90,7 +101,7 @@ const moveToSourcesDir = async (srcFile, ofacSourcesDir) => {
return dstFile
}
function update () {
function update() {
if (!OFAC_DATA_DIR) {
throw new Error('ofacDataDir must be defined in the environment')
}
@ -118,17 +129,20 @@ function update () {
if (skipUpdate) return Promise.resolve()
const downloads = _.flow(
_.map(file => download(DOWNLOAD_DIR, file).then(parseToJson))
_.map(file => download(DOWNLOAD_DIR, file).then(parseToJson)),
)(OFAC_SOURCES)
return Promise.all(downloads)
.then(parsed => {
const moves = _.map(src => moveToSourcesDir(src, OFAC_SOURCES_DIR), parsed)
const timestamp = new Date().toISOString()
return Promise.all(downloads).then(parsed => {
const moves = _.map(
src => moveToSourcesDir(src, OFAC_SOURCES_DIR),
parsed,
)
const timestamp = new Date().toISOString()
return Promise.all([...moves])
.then(() => writeFile(LAST_UPDATED_FILE, timestamp))
})
return Promise.all([...moves]).then(() =>
writeFile(LAST_UPDATED_FILE, timestamp),
)
})
})
}