Merge pull request #1692 from siiky/refactor/lam-1122

LAM-1122 refactor: simplify OFAC code
This commit is contained in:
Rafael Taranto 2024-07-11 11:12:58 +01:00 committed by GitHub
commit 681817c2eb
4 changed files with 20 additions and 58 deletions

View file

@ -1,20 +1,15 @@
const fs = require('fs') const { readdir } = require('fs/promises')
const path = require('path') const path = require('path')
const util = require('util')
const loader = require('./loading') const loader = require('./loading')
const matcher = require('./matching') const matcher = require('./matching')
const nameUtils = require('./name-utils') const nameUtils = require('./name-utils')
const _ = require('lodash/fp') const _ = require('lodash/fp')
const logger = require('../logger') const logger = require('../logger')
const debugLog = require('../pp')(__filename) // KOSTIS TODO: remove
const OFAC_DATA_DIR = process.env.OFAC_DATA_DIR const OFAC_DATA_DIR = process.env.OFAC_DATA_DIR
let structs = null let structs = null
const readdir = util.promisify(fs.readdir)
function load () { function load () {
if (!OFAC_DATA_DIR) { if (!OFAC_DATA_DIR) {
const message = 'The ofacDataDir option has not been set in the environment' const message = 'The ofacDataDir option has not been set in the environment'
@ -43,8 +38,6 @@ function makeCompatible (nameParts) {
} }
function match (nameParts, birthDateString, options) { function match (nameParts, birthDateString, options) {
const {debug} = options
if (!structs) { if (!structs) {
logger.error(new Error('The OFAC data sources have not been loaded yet.')) logger.error(new Error('The OFAC data sources have not been loaded yet.'))
return false return false
@ -69,10 +62,7 @@ function match (nameParts, birthDateString, options) {
])(birthDateString) ])(birthDateString)
const candidate = {parts, fullName, words, birthDate} const candidate = {parts, fullName, words, birthDate}
debug && debugLog(candidate)
const result = matcher.match(structs, candidate, options) const result = matcher.match(structs, candidate, options)
debug && debugLog(result)
return result return result
} }

View file

@ -1,7 +1,6 @@
const jaro = require('talisman/metrics/distance/jaro') const jaro = require('talisman/metrics/distance/jaro')
const _ = require('lodash/fp') const _ = require('lodash/fp')
const debugLog = require('../pp')(__filename) // KOSTIS TODO: remove
const logger = require('../logger') const logger = require('../logger')
const stringSimilarity = _.curry(jaro) const stringSimilarity = _.curry(jaro)
@ -30,7 +29,7 @@ const isBornTooLongSince = _.curry((days, dateObject, individual) => {
// algorithm // algorithm
function match (structs, candidate, options) { function match (structs, candidate, options) {
const {threshold, fullNameThreshold, ratio = 0.5, debug, verboseFor} = options const {threshold, fullNameThreshold, ratio = 0.5, verboseFor} = options
const {fullName, words, birthDate} = candidate const {fullName, words, birthDate} = candidate
// Accept aliases who's full name matches. // Accept aliases who's full name matches.
@ -90,9 +89,6 @@ function match (structs, candidate, options) {
_.map(_.first) _.map(_.first)
)(matches) )(matches)
debug && debugLog(aliasIdsFromFullName)
debug && debugLog(aliasIdsFromNamePart)
// Get the full record for each matched id // Get the full record for each matched id
const getIndividual = aliasId => { const getIndividual = aliasId => {
const individualId = structs.aliasToIndividual.get(aliasId) const individualId = structs.aliasToIndividual.get(aliasId)

View file

@ -4,8 +4,6 @@ const nameUtils = require('./name-utils')
const logger = require('../logger') const logger = require('../logger')
const _ = require('lodash/fp') const _ = require('lodash/fp')
const debug_log = require('../pp')(__filename) // KOSTIS TODO: remove
// KOSTIS TODO: get these from the document itself // KOSTIS TODO: get these from the document itself
const INDIVIDUAL = '4' const INDIVIDUAL = '4'
const NAME = '1403' const NAME = '1403'
@ -132,8 +130,6 @@ function processProfile (profileNode) {
const birthDatePeriods = mapCompact(processFeature, profileNode.Feature) const birthDatePeriods = mapCompact(processFeature, profileNode.Feature)
const individual = {id, aliases, birthDatePeriods} const individual = {id, aliases, birthDatePeriods}
// debug_log(individual)
return individual return individual
} }

View file

@ -1,9 +1,10 @@
const parser = require('./parsing') const parser = require('./parsing')
const https = require('https') const https = require('https')
const url = require('url') const URL = require('url')
const fs = require('fs') const { createWriteStream } = require('fs')
const fs = require('fs/promises')
const { readFile, writeFile, rename, unlink } = fs
const path = require('path') const path = require('path')
const util = require('util')
const _ = require('lodash/fp') const _ = require('lodash/fp')
const logger = require('../logger') const logger = require('../logger')
@ -14,36 +15,17 @@ const OFAC_SOURCES_NAMES = process.env.OFAC_SOURCES_NAMES.split(',')
const OFAC_SOURCES_URLS = process.env.OFAC_SOURCES_URLS.split(',') const OFAC_SOURCES_URLS = process.env.OFAC_SOURCES_URLS.split(',')
const ofacSources = _.map( const ofacSources = _.map(
it => ({ ([name, url]) => ({ name, url }),
name: it[0],
url: it[1]
}),
_.zip(OFAC_SOURCES_NAMES, OFAC_SOURCES_URLS) _.zip(OFAC_SOURCES_NAMES, OFAC_SOURCES_URLS)
) )
function mkdir (path) { const mkdir = path =>
return new Promise((resolve, reject) => { fs.mkdir(path)
fs.mkdir(path, err => { .catch(err => err.code === 'EEXIST' ? Promise.resolve() : Promise.reject(err))
if (!err) return resolve()
if (err.code === 'EEXIST') return resolve()
reject(err)
})
})
}
const readFile = util.promisify(fs.readFile) const promiseGetEtag = ({ url }) =>
const writeFile = util.promisify(fs.writeFile) new Promise((resolve, reject) => {
const rename = util.promisify(fs.rename) const parsed = URL.parse(url)
const unlink = util.promisify(fs.unlink)
const remove = file => {
return unlink(file)
}
const promiseGetEtag = (source) => {
return new Promise((resolve, reject) => {
const {url: sourceUrl} = source
const parsed = url.parse(sourceUrl)
const requestOptions = { const requestOptions = {
hostname: parsed.hostname, hostname: parsed.hostname,
path: parsed.path, path: parsed.path,
@ -59,26 +41,24 @@ const promiseGetEtag = (source) => {
request.end() request.end()
}) })
}
const download = _.curry((dstDir, source) => { const download = (dstDir, { name, url }) => {
const {name, url: sourceUrl} = source
const dstFile = path.join(dstDir, name + '.xml') const dstFile = path.join(dstDir, name + '.xml')
const file = fs.createWriteStream(dstFile) const file = createWriteStream(dstFile)
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const request = https.get(sourceUrl, response => { const request = https.get(url, response => {
response.pipe(file) response.pipe(file)
file.on('finish', () => file.close(() => resolve(dstFile))) file.on('finish', () => file.close(() => resolve(dstFile)))
}) })
request.on('error', reject) request.on('error', reject)
}) })
}) }
const parseToJson = srcFile => { const parseToJson = srcFile => {
const dstFile = srcFile.replace(/\.xml$/, '.json') const dstFile = srcFile.replace(/\.xml$/, '.json')
const writeStream = fs.createWriteStream(dstFile) const writeStream = createWriteStream(dstFile)
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
parser.parse(srcFile, (err, profile) => { parser.parse(srcFile, (err, profile) => {
@ -137,7 +117,7 @@ function update () {
const promiseNewEtags = Promise.resolve(ofacSources || []) const promiseNewEtags = Promise.resolve(ofacSources || [])
.then(sources => Promise.all(_.map(promiseGetEtag, sources)) .then(sources => Promise.all(_.map(promiseGetEtag, sources))
.then(etags => _.map( .then(etags => _.map(
([source, etag]) => ({...source, etag}), ([source, etag]) => _.set('etag', etag, source),
_.zip(sources, etags) _.zip(sources, etags)
)) ))
) )
@ -166,7 +146,7 @@ function update () {
return Promise.all(downloads) return Promise.all(downloads)
.then(parsed => { .then(parsed => {
const moves = _.map(src => moveToSourcesDir(src, OFAC_SOURCES_DIR), parsed) const moves = _.map(src => moveToSourcesDir(src, OFAC_SOURCES_DIR), parsed)
const deletions = _.map(remove, missing) const deletions = _.map(unlink, missing)
const updateEtags = writeFile(OFAC_ETAGS_FILE, etagsJson) const updateEtags = writeFile(OFAC_ETAGS_FILE, etagsJson)
return Promise.all([updateEtags, ...moves, ...deletions]) return Promise.all([updateEtags, ...moves, ...deletions])