chore: server code formatting
This commit is contained in:
parent
aedabcbdee
commit
68517170e2
234 changed files with 9824 additions and 6195 deletions
|
|
@ -10,7 +10,7 @@ const OFAC_DATA_DIR = process.env.OFAC_DATA_DIR
|
|||
|
||||
let structs = null
|
||||
|
||||
function load () {
|
||||
function load() {
|
||||
if (!OFAC_DATA_DIR) {
|
||||
const message = 'The ofacDataDir option has not been set in the environment'
|
||||
return Promise.reject(new Error(message))
|
||||
|
|
@ -19,10 +19,12 @@ function load () {
|
|||
const ofacSourcesDir = path.join(OFAC_DATA_DIR, 'sources')
|
||||
|
||||
return readdir(ofacSourcesDir)
|
||||
.then(_.flow(
|
||||
_.map(file => path.join(ofacSourcesDir, file)),
|
||||
loader.load
|
||||
))
|
||||
.then(
|
||||
_.flow(
|
||||
_.map(file => path.join(ofacSourcesDir, file)),
|
||||
loader.load,
|
||||
),
|
||||
)
|
||||
.then(result => {
|
||||
return (structs = result)
|
||||
})
|
||||
|
|
@ -30,14 +32,14 @@ function load () {
|
|||
|
||||
// nameParts should be an object like {firstName: "John", lastName: "Doe", ...}
|
||||
|
||||
function makeCompatible (nameParts) {
|
||||
function makeCompatible(nameParts) {
|
||||
const partNames = _.keys(nameParts)
|
||||
const values = _.map(_.lowerCase, _.values(nameParts))
|
||||
const props = _.zipAll([partNames, values])
|
||||
return _.map(_.zipObject(['partName', 'value']), props)
|
||||
}
|
||||
|
||||
function match (nameParts, birthDateString, options) {
|
||||
function match(nameParts, birthDateString, options) {
|
||||
if (!structs) {
|
||||
logger.error(new Error('The OFAC data sources have not been loaded yet.'))
|
||||
return false
|
||||
|
|
@ -50,24 +52,27 @@ function match (nameParts, birthDateString, options) {
|
|||
|
||||
// birthDateString is in YYYYMMDD format
|
||||
const birthDate = _.cond([
|
||||
[_.identity, () => {
|
||||
const year = parseInt(birthDateString.slice(0, 4))
|
||||
const month = parseInt(birthDateString.slice(4, 6))
|
||||
const day = parseInt(birthDateString.slice(6, 8))
|
||||
const date = new Date(year, month - 1, day)
|
||||
[
|
||||
_.identity,
|
||||
() => {
|
||||
const year = parseInt(birthDateString.slice(0, 4))
|
||||
const month = parseInt(birthDateString.slice(4, 6))
|
||||
const day = parseInt(birthDateString.slice(6, 8))
|
||||
const date = new Date(year, month - 1, day)
|
||||
|
||||
return {year, month, day, date}
|
||||
}],
|
||||
[_.stubTrue, () => null]
|
||||
return { year, month, day, date }
|
||||
},
|
||||
],
|
||||
[_.stubTrue, () => null],
|
||||
])(birthDateString)
|
||||
|
||||
const candidate = {parts, fullName, words, birthDate}
|
||||
const candidate = { parts, fullName, words, birthDate }
|
||||
const result = matcher.match(structs, candidate, options)
|
||||
return result
|
||||
}
|
||||
|
||||
function getStructs () {
|
||||
function getStructs() {
|
||||
return structs
|
||||
}
|
||||
|
||||
module.exports = {load, match, getStructs}
|
||||
module.exports = { load, match, getStructs }
|
||||
|
|
|
|||
|
|
@ -2,20 +2,18 @@ const fs = require('fs')
|
|||
const ndjson = require('ndjson')
|
||||
const _ = require('lodash/fp')
|
||||
|
||||
|
||||
const mapAliases = _.curry((iteratee, individuals) => {
|
||||
const mapIndividual = individual => {
|
||||
const {id, aliases} = individual
|
||||
const { id, aliases } = individual
|
||||
return _.map(alias => iteratee(id, alias), aliases)
|
||||
}
|
||||
return _.flatMap(mapIndividual, individuals)
|
||||
})
|
||||
|
||||
|
||||
const getPhoneticEntries = (individualId, alias) => {
|
||||
const pairPhoneticsWithValues = word => {
|
||||
const {value, phonetics} = word
|
||||
const makeEntry = phonetic => ({value, phonetic, aliasId: alias.id})
|
||||
const { value, phonetics } = word
|
||||
const makeEntry = phonetic => ({ value, phonetic, aliasId: alias.id })
|
||||
return _.map(makeEntry, phonetics)
|
||||
}
|
||||
return _.flatMap(pairPhoneticsWithValues, alias.words)
|
||||
|
|
@ -25,17 +23,13 @@ const producePhoneticMap = _.flow(
|
|||
mapAliases(getPhoneticEntries),
|
||||
_.flatten,
|
||||
_.groupBy(_.get('phonetic')),
|
||||
_.mapValues(_.flow(
|
||||
_.map(_.get('aliasId')),
|
||||
_.uniq
|
||||
)),
|
||||
_.mapValues(_.flow(_.map(_.get('aliasId')), _.uniq)),
|
||||
_.toPairs,
|
||||
entries => new Map(entries)
|
||||
entries => new Map(entries),
|
||||
)
|
||||
|
||||
|
||||
const getWords = (individualId, alias) => {
|
||||
const pairWordsWithIds = word => ({value: word.value, aliasId: alias.id})
|
||||
const pairWordsWithIds = word => ({ value: word.value, aliasId: alias.id })
|
||||
return _.map(pairWordsWithIds, alias.words)
|
||||
}
|
||||
|
||||
|
|
@ -45,7 +39,7 @@ const produceWordList = _.flow(
|
|||
_.groupBy(_.get('value')),
|
||||
_.mapValues(_.map(_.get('aliasId'))),
|
||||
_.toPairs,
|
||||
_.map(_.zipObject(['value', 'aliasIds']))
|
||||
_.map(_.zipObject(['value', 'aliasIds'])),
|
||||
)
|
||||
|
||||
const parseSource = source => {
|
||||
|
|
@ -55,10 +49,13 @@ const parseSource = source => {
|
|||
const jsonStream = readStream.pipe(ndjson.parse())
|
||||
jsonStream.on('data', individual => {
|
||||
_.forEach(period => {
|
||||
_.forEach(date => {
|
||||
const {year, month, day} = date
|
||||
date.date = new Date(year, month - 1, day)
|
||||
}, [period.start, period.end])
|
||||
_.forEach(
|
||||
date => {
|
||||
const { year, month, day } = date
|
||||
date.date = new Date(year, month - 1, day)
|
||||
},
|
||||
[period.start, period.end],
|
||||
)
|
||||
}, individual.birthDatePeriods)
|
||||
individuals.push(individual)
|
||||
})
|
||||
|
|
@ -71,18 +68,14 @@ const parseSource = source => {
|
|||
})
|
||||
}
|
||||
|
||||
const load = sources => Promise.all(_.map(parseSource, sources))
|
||||
.then(_.flow(
|
||||
_.flatten,
|
||||
_.compact,
|
||||
_.uniqBy(_.get('id')),
|
||||
individuals => {
|
||||
|
||||
const load = sources =>
|
||||
Promise.all(_.map(parseSource, sources)).then(
|
||||
_.flow(_.flatten, _.compact, _.uniqBy(_.get('id')), individuals => {
|
||||
const individualsMap = _.flow(
|
||||
_.groupBy(_.get('id')),
|
||||
_.mapValues(_.first),
|
||||
_.toPairs,
|
||||
entries => new Map(entries)
|
||||
entries => new Map(entries),
|
||||
)(individuals)
|
||||
|
||||
const makeEntries = (individualId, alias) => [alias.id, alias]
|
||||
|
|
@ -101,9 +94,9 @@ const load = sources => Promise.all(_.map(parseSource, sources))
|
|||
aliasesMap,
|
||||
aliasToIndividual,
|
||||
phoneticMap,
|
||||
wordList
|
||||
wordList,
|
||||
}
|
||||
}
|
||||
))
|
||||
}),
|
||||
)
|
||||
|
||||
module.exports = {load}
|
||||
module.exports = { load }
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ const stringSimilarity = _.curry(jaro)
|
|||
|
||||
// birth date
|
||||
|
||||
function isDateWithinSomeDaysOfPeriod (period, date, days) {
|
||||
function isDateWithinSomeDaysOfPeriod(period, date, days) {
|
||||
const inMillisecs = 24 * 60 * 60 * 1000
|
||||
|
||||
const startTime = period.start.date.getTime() - days * inMillisecs
|
||||
|
|
@ -16,32 +16,35 @@ function isDateWithinSomeDaysOfPeriod (period, date, days) {
|
|||
const endTime = period.end.date.getTime() + days * inMillisecs
|
||||
const endDate = new Date(endTime)
|
||||
|
||||
return (startDate < date && date < endDate)
|
||||
return startDate < date && date < endDate
|
||||
}
|
||||
|
||||
const isBornTooLongSince = _.curry((days, dateObject, individual) => {
|
||||
if (!dateObject) return false
|
||||
if (_.isEmpty(individual.birthDatePeriods)) return false
|
||||
const isWithinSomeYears = _.partialRight(isDateWithinSomeDaysOfPeriod, [dateObject.date, days])
|
||||
const isWithinSomeYears = _.partialRight(isDateWithinSomeDaysOfPeriod, [
|
||||
dateObject.date,
|
||||
days,
|
||||
])
|
||||
return !_.some(isWithinSomeYears, individual.birthDatePeriods)
|
||||
})
|
||||
|
||||
// algorithm
|
||||
|
||||
function match (structs, candidate, options) {
|
||||
const {threshold, fullNameThreshold, ratio = 0.5, verboseFor} = options
|
||||
const {fullName, words, birthDate} = candidate
|
||||
function match(structs, candidate, options) {
|
||||
const { threshold, fullNameThreshold, ratio = 0.5, verboseFor } = options
|
||||
const { fullName, words, birthDate } = candidate
|
||||
|
||||
// Accept aliases who's full name matches.
|
||||
const doesNameMatch = _.flow(
|
||||
_.get('fullName'),
|
||||
stringSimilarity(fullName),
|
||||
_.lte(fullNameThreshold)
|
||||
_.lte(fullNameThreshold),
|
||||
)
|
||||
const aliases = _.flatMap(_.get('aliases'), structs.individuals)
|
||||
const aliasIdsFromFullName = _.flow(
|
||||
_.filter(doesNameMatch),
|
||||
_.map(_.get('id'))
|
||||
_.map(_.get('id')),
|
||||
)(aliases)
|
||||
|
||||
const phoneticWeight = ratio
|
||||
|
|
@ -60,12 +63,25 @@ function match (structs, candidate, options) {
|
|||
|
||||
for (const aliasId of wordEntry.aliasIds) {
|
||||
const phoneticScore = phoneticMatches.has(aliasId) ? 1 : -1
|
||||
const finalScore = stringWeight * stringScore + phoneticWeight * phoneticScore
|
||||
const finalScore =
|
||||
stringWeight * stringScore + phoneticWeight * phoneticScore
|
||||
|
||||
verbose && logger.debug(finalScore.toFixed(2), stringScore.toFixed(2), phoneticScore.toFixed(2), word.value, wordEntry.value)
|
||||
verbose &&
|
||||
logger.debug(
|
||||
finalScore.toFixed(2),
|
||||
stringScore.toFixed(2),
|
||||
phoneticScore.toFixed(2),
|
||||
word.value,
|
||||
wordEntry.value,
|
||||
)
|
||||
|
||||
if (finalScore >= threshold) {
|
||||
const entry = {aliasId, score: finalScore, word: word.value, value: wordEntry.value}
|
||||
const entry = {
|
||||
aliasId,
|
||||
score: finalScore,
|
||||
word: word.value,
|
||||
value: wordEntry.value,
|
||||
}
|
||||
const index = _.sortedIndexBy(x => -x.score, entry, matches)
|
||||
matches.splice(index, 0, entry)
|
||||
}
|
||||
|
|
@ -83,10 +99,10 @@ function match (structs, candidate, options) {
|
|||
_.countBy(_.identity),
|
||||
_.toPairs,
|
||||
_.filter(([aliasId, count]) => {
|
||||
const {length} = structs.aliasesMap.get(aliasId).words
|
||||
return (count >= _.min([2, words.length, length]))
|
||||
const { length } = structs.aliasesMap.get(aliasId).words
|
||||
return count >= _.min([2, words.length, length])
|
||||
}),
|
||||
_.map(_.first)
|
||||
_.map(_.first),
|
||||
)(matches)
|
||||
|
||||
// Get the full record for each matched id
|
||||
|
|
@ -94,10 +110,9 @@ function match (structs, candidate, options) {
|
|||
const individualId = structs.aliasToIndividual.get(aliasId)
|
||||
return structs.individualsMap.get(individualId)
|
||||
}
|
||||
const suspects = _.uniq(_.map(getIndividual, [
|
||||
...aliasIdsFromFullName,
|
||||
...aliasIdsFromNamePart
|
||||
]))
|
||||
const suspects = _.uniq(
|
||||
_.map(getIndividual, [...aliasIdsFromFullName, ...aliasIdsFromNamePart]),
|
||||
)
|
||||
|
||||
// Reject everyone who is born two years away.
|
||||
const twoYears = 365 * 2
|
||||
|
|
@ -105,4 +120,4 @@ function match (structs, candidate, options) {
|
|||
return _.reject(unqualified, suspects)
|
||||
}
|
||||
|
||||
module.exports = {match}
|
||||
module.exports = { match }
|
||||
|
|
|
|||
|
|
@ -5,17 +5,24 @@ const makePhonetic = _.flow(doubleMetaphone, _.uniq)
|
|||
|
||||
// Combine name-parts in a standard order.
|
||||
|
||||
const partOrdering = ['firstName', 'middleName', 'maidenName', 'patronymic', 'matronymic', 'lastName']
|
||||
const partOrdering = [
|
||||
'firstName',
|
||||
'middleName',
|
||||
'maidenName',
|
||||
'patronymic',
|
||||
'matronymic',
|
||||
'lastName',
|
||||
]
|
||||
|
||||
const usingPartOrder = _.flow(
|
||||
_.get('partName'),
|
||||
_.partialRight(_.indexOf, [partOrdering])
|
||||
_.partialRight(_.indexOf, [partOrdering]),
|
||||
)
|
||||
|
||||
const makeFullName = _.flow(
|
||||
_.sortBy(usingPartOrder),
|
||||
_.map(_.get('value')),
|
||||
_.join(' ')
|
||||
_.join(' '),
|
||||
)
|
||||
|
||||
const makeWords = value => {
|
||||
|
|
@ -27,5 +34,5 @@ const makeWords = value => {
|
|||
|
||||
module.exports = {
|
||||
makeFullName,
|
||||
makeWords
|
||||
makeWords,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ const partNames = new Map([
|
|||
[MAIDEN_NAME, 'maidenName'],
|
||||
[PATRONYMIC, 'patronymic'],
|
||||
[MATRONYMIC, 'matronymic'],
|
||||
[NICKNAME, 'nickname']
|
||||
[NICKNAME, 'nickname'],
|
||||
])
|
||||
|
||||
const filteredWords = [
|
||||
|
|
@ -34,7 +34,7 @@ const filteredWords = [
|
|||
|
||||
// group-id to type-id
|
||||
|
||||
function processMasterNamePartGroup (groupNode) {
|
||||
function processMasterNamePartGroup(groupNode) {
|
||||
const namePartGroupNode = groupNode.NamePartGroup
|
||||
const groupId = namePartGroupNode.$.ID
|
||||
const typeId = namePartGroupNode.$.NamePartTypeID
|
||||
|
|
@ -47,7 +47,7 @@ const processDocumentedNamePart = _.curry((groupTypes, namePartNode) => {
|
|||
const typeId = groupTypes.get(groupId)
|
||||
const partName = partNames.get(typeId)
|
||||
const value = _.lowerCase(valueNode.$text)
|
||||
return {partName, value}
|
||||
return { partName, value }
|
||||
})
|
||||
|
||||
const isLatin = _.matchesProperty(['$', 'DocNameStatusID'], PRIMARY_LATIN)
|
||||
|
|
@ -72,29 +72,26 @@ const processAlias = _.curry((groupTypes, aliasNode) => {
|
|||
const fullName = nameUtils.makeFullName(parts)
|
||||
const words = _.flow(
|
||||
nameUtils.makeWords,
|
||||
_.reject(_.flow(
|
||||
_.get('value'),
|
||||
word => filteredWords.includes(word)
|
||||
))
|
||||
_.reject(_.flow(_.get('value'), word => filteredWords.includes(word))),
|
||||
)(fullName)
|
||||
|
||||
// if (words.length < 2) {
|
||||
// console.log(JSON.stringify(words))
|
||||
// }
|
||||
|
||||
return {id, parts, fullName, words}
|
||||
return { id, parts, fullName, words }
|
||||
})
|
||||
|
||||
// birth date
|
||||
|
||||
function processDate (dateNode) {
|
||||
function processDate(dateNode) {
|
||||
const year = parseInt(dateNode.Year)
|
||||
const month = parseInt(dateNode.Month)
|
||||
const day = parseInt(dateNode.Day)
|
||||
return {year, month, day}
|
||||
return { year, month, day }
|
||||
}
|
||||
|
||||
function processFeature (featureNode) {
|
||||
function processFeature(featureNode) {
|
||||
if (featureNode.$.FeatureTypeID !== BIRTH_DATE) return
|
||||
|
||||
const datePeriodNode = featureNode.FeatureVersion.DatePeriod
|
||||
|
|
@ -103,7 +100,7 @@ function processFeature (featureNode) {
|
|||
// By using Start.From and End.To we use the extremes of the date-period.
|
||||
const period = {
|
||||
start: datePeriodNode.Start.From,
|
||||
end: datePeriodNode.End.To
|
||||
end: datePeriodNode.End.To,
|
||||
}
|
||||
|
||||
return _.mapValues(processDate, period)
|
||||
|
|
@ -111,13 +108,16 @@ function processFeature (featureNode) {
|
|||
|
||||
// profile
|
||||
|
||||
function processProfile (profileNode) {
|
||||
function processProfile(profileNode) {
|
||||
if (profileNode.$.PartySubTypeID !== INDIVIDUAL) return
|
||||
|
||||
const id = profileNode.$.ID
|
||||
|
||||
const identityNode = profileNode.Identity
|
||||
const groupTypesEntries = _.map(processMasterNamePartGroup, identityNode.NamePartGroups.MasterNamePartGroup)
|
||||
const groupTypesEntries = _.map(
|
||||
processMasterNamePartGroup,
|
||||
identityNode.NamePartGroups.MasterNamePartGroup,
|
||||
)
|
||||
const groupTypes = new Map(groupTypesEntries)
|
||||
|
||||
const mapCompact = _.flow(_.map, _.compact)
|
||||
|
|
@ -128,7 +128,7 @@ function processProfile (profileNode) {
|
|||
if (_.isEmpty(aliases)) return
|
||||
|
||||
const birthDatePeriods = mapCompact(processFeature, profileNode.Feature)
|
||||
const individual = {id, aliases, birthDatePeriods}
|
||||
const individual = { id, aliases, birthDatePeriods }
|
||||
|
||||
return individual
|
||||
}
|
||||
|
|
@ -158,4 +158,4 @@ const parse = (source, callback) => {
|
|||
})
|
||||
}
|
||||
|
||||
module.exports = {parse}
|
||||
module.exports = { parse }
|
||||
|
|
|
|||
|
|
@ -1,7 +1,14 @@
|
|||
const parser = require('./parsing')
|
||||
const axios = require('axios')
|
||||
const { createWriteStream } = require('fs')
|
||||
const { rename, writeFile, readFile, mkdir, copyFile, unlink } = require('fs/promises')
|
||||
const {
|
||||
rename,
|
||||
writeFile,
|
||||
readFile,
|
||||
mkdir,
|
||||
copyFile,
|
||||
unlink,
|
||||
} = require('fs/promises')
|
||||
const path = require('path')
|
||||
const _ = require('lodash/fp')
|
||||
|
||||
|
|
@ -10,17 +17,21 @@ const OFAC_DATA_DIR = process.env.OFAC_DATA_DIR
|
|||
const OFAC_SOURCES_DIR = path.join(OFAC_DATA_DIR, 'sources')
|
||||
const LAST_UPDATED_FILE = path.resolve(OFAC_DATA_DIR, 'last_updated.dat')
|
||||
|
||||
const OFAC_SOURCES = [{
|
||||
name: 'sdn_advanced',
|
||||
url: 'https://sanctionslistservice.ofac.treas.gov/api/download/sdn_advanced.xml'
|
||||
}, {
|
||||
name: 'cons_advanced',
|
||||
url: 'https://sanctionslistservice.ofac.treas.gov/api/download/cons_advanced.xml'
|
||||
}]
|
||||
const OFAC_SOURCES = [
|
||||
{
|
||||
name: 'sdn_advanced',
|
||||
url: 'https://sanctionslistservice.ofac.treas.gov/api/download/sdn_advanced.xml',
|
||||
},
|
||||
{
|
||||
name: 'cons_advanced',
|
||||
url: 'https://sanctionslistservice.ofac.treas.gov/api/download/cons_advanced.xml',
|
||||
},
|
||||
]
|
||||
|
||||
const _mkdir = path =>
|
||||
mkdir(path)
|
||||
.catch(err => err.code === 'EEXIST' ? Promise.resolve() : Promise.reject(err))
|
||||
mkdir(path).catch(err =>
|
||||
err.code === 'EEXIST' ? Promise.resolve() : Promise.reject(err),
|
||||
)
|
||||
|
||||
const download = (dstDir, { name, url }) => {
|
||||
const dstFile = path.join(dstDir, name + '.xml')
|
||||
|
|
@ -90,7 +101,7 @@ const moveToSourcesDir = async (srcFile, ofacSourcesDir) => {
|
|||
return dstFile
|
||||
}
|
||||
|
||||
function update () {
|
||||
function update() {
|
||||
if (!OFAC_DATA_DIR) {
|
||||
throw new Error('ofacDataDir must be defined in the environment')
|
||||
}
|
||||
|
|
@ -118,17 +129,20 @@ function update () {
|
|||
if (skipUpdate) return Promise.resolve()
|
||||
|
||||
const downloads = _.flow(
|
||||
_.map(file => download(DOWNLOAD_DIR, file).then(parseToJson))
|
||||
_.map(file => download(DOWNLOAD_DIR, file).then(parseToJson)),
|
||||
)(OFAC_SOURCES)
|
||||
|
||||
return Promise.all(downloads)
|
||||
.then(parsed => {
|
||||
const moves = _.map(src => moveToSourcesDir(src, OFAC_SOURCES_DIR), parsed)
|
||||
const timestamp = new Date().toISOString()
|
||||
return Promise.all(downloads).then(parsed => {
|
||||
const moves = _.map(
|
||||
src => moveToSourcesDir(src, OFAC_SOURCES_DIR),
|
||||
parsed,
|
||||
)
|
||||
const timestamp = new Date().toISOString()
|
||||
|
||||
return Promise.all([...moves])
|
||||
.then(() => writeFile(LAST_UPDATED_FILE, timestamp))
|
||||
})
|
||||
return Promise.all([...moves]).then(() =>
|
||||
writeFile(LAST_UPDATED_FILE, timestamp),
|
||||
)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue