chore: use monorepo organization

2025-05-12 10:52:54 +01:00 · 2025-05-12 10:52:54 +01:00 · a687827f7e
commit a687827f7e
parent deaf7d6ecc
1099 changed files with 8184 additions and 11535 deletions
--- a/packages/server/lib/ofac/index.js
+++ b/packages/server/lib/ofac/index.js
@ -0,0 +1,73 @@
+const { readdir } = require('fs/promises')
+const path = require('path')
+const loader = require('./loading')
+const matcher = require('./matching')
+const nameUtils = require('./name-utils')
+const _ = require('lodash/fp')
+const logger = require('../logger')
+
+const OFAC_DATA_DIR = process.env.OFAC_DATA_DIR
+
+let structs = null
+
+function load () {
+  if (!OFAC_DATA_DIR) {
+    const message = 'The ofacDataDir option has not been set in the environment'
+    return Promise.reject(new Error(message))
+  }
+
+  const ofacSourcesDir = path.join(OFAC_DATA_DIR, 'sources')
+
+  return readdir(ofacSourcesDir)
+    .then(_.flow(
+      _.map(file => path.join(ofacSourcesDir, file)),
+      loader.load
+    ))
+    .then(result => {
+      return (structs = result)
+    })
+}
+
+// nameParts should be an object like {firstName: "John", lastName: "Doe", ...}
+
+function makeCompatible (nameParts) {
+  const partNames = _.keys(nameParts)
+  const values = _.map(_.lowerCase, _.values(nameParts))
+  const props = _.zipAll([partNames, values])
+  return _.map(_.zipObject(['partName', 'value']), props)
+}
+
+function match (nameParts, birthDateString, options) {
+  if (!structs) {
+    logger.error(new Error('The OFAC data sources have not been loaded yet.'))
+    return false
+  }
+
+  // Prepare the input data
+  const parts = makeCompatible(nameParts)
+  const fullName = nameUtils.makeFullName(parts)
+  const words = nameUtils.makeWords(fullName)
+
+  // birthDateString is in YYYYMMDD format
+  const birthDate = _.cond([
+    [_.identity, () => {
+      const year = parseInt(birthDateString.slice(0, 4))
+      const month = parseInt(birthDateString.slice(4, 6))
+      const day = parseInt(birthDateString.slice(6, 8))
+      const date = new Date(year, month - 1, day)
+
+      return {year, month, day, date}
+    }],
+    [_.stubTrue, () => null]
+  ])(birthDateString)
+
+  const candidate = {parts, fullName, words, birthDate}
+  const result = matcher.match(structs, candidate, options)
+  return result
+}
+
+function getStructs () {
+  return structs
+}
+
+module.exports = {load, match, getStructs}
--- a/packages/server/lib/ofac/loading.js
+++ b/packages/server/lib/ofac/loading.js
@ -0,0 +1,109 @@
+const fs = require('fs')
+const ndjson = require('ndjson')
+const _ = require('lodash/fp')
+
+
+const mapAliases = _.curry((iteratee, individuals) => {
+  const mapIndividual = individual => {
+    const {id, aliases} = individual
+    return _.map(alias => iteratee(id, alias), aliases)
+  }
+  return _.flatMap(mapIndividual, individuals)
+})
+
+
+const getPhoneticEntries = (individualId, alias) => {
+  const pairPhoneticsWithValues = word => {
+    const {value, phonetics} = word
+    const makeEntry = phonetic => ({value, phonetic, aliasId: alias.id})
+    return _.map(makeEntry, phonetics)
+  }
+  return _.flatMap(pairPhoneticsWithValues, alias.words)
+}
+
+const producePhoneticMap = _.flow(
+  mapAliases(getPhoneticEntries),
+  _.flatten,
+  _.groupBy(_.get('phonetic')),
+  _.mapValues(_.flow(
+    _.map(_.get('aliasId')),
+    _.uniq
+  )),
+  _.toPairs,
+  entries => new Map(entries)
+)
+
+
+const getWords = (individualId, alias) => {
+  const pairWordsWithIds = word => ({value: word.value, aliasId: alias.id})
+  return _.map(pairWordsWithIds, alias.words)
+}
+
+const produceWordList = _.flow(
+  mapAliases(getWords),
+  _.flatten,
+  _.groupBy(_.get('value')),
+  _.mapValues(_.map(_.get('aliasId'))),
+  _.toPairs,
+  _.map(_.zipObject(['value', 'aliasIds']))
+)
+
+const parseSource = source => {
+  const individuals = []
+
+  const readStream = fs.createReadStream(source)
+  const jsonStream = readStream.pipe(ndjson.parse())
+  jsonStream.on('data', individual => {
+    _.forEach(period => {
+      _.forEach(date => {
+        const {year, month, day} = date
+        date.date = new Date(year, month - 1, day)
+      }, [period.start, period.end])
+    }, individual.birthDatePeriods)
+    individuals.push(individual)
+  })
+
+  return new Promise((resolve, reject) => {
+    jsonStream.on('error', reject)
+    jsonStream.on('end', () => {
+      resolve(individuals)
+    })
+  })
+}
+
+const load = sources => Promise.all(_.map(parseSource, sources))
+  .then(_.flow(
+    _.flatten,
+    _.compact,
+    _.uniqBy(_.get('id')),
+    individuals => {
+
+      const individualsMap = _.flow(
+        _.groupBy(_.get('id')),
+        _.mapValues(_.first),
+        _.toPairs,
+        entries => new Map(entries)
+      )(individuals)
+
+      const makeEntries = (individualId, alias) => [alias.id, alias]
+      const aliasesMap = new Map(mapAliases(makeEntries, individuals))
+
+      const getIdPairs = (individualId, alias) => [alias.id, individualId]
+      const idPairs = mapAliases(getIdPairs, individuals)
+      const aliasToIndividual = new Map(idPairs)
+
+      const phoneticMap = producePhoneticMap(individuals)
+      const wordList = produceWordList(individuals)
+
+      return {
+        individuals,
+        individualsMap,
+        aliasesMap,
+        aliasToIndividual,
+        phoneticMap,
+        wordList
+      }
+    }
+  ))
+
+module.exports = {load}
--- a/packages/server/lib/ofac/matching.js
+++ b/packages/server/lib/ofac/matching.js
@ -0,0 +1,108 @@
+const jaro = require('talisman/metrics/distance/jaro')
+const _ = require('lodash/fp')
+
+const logger = require('../logger')
+
+const stringSimilarity = _.curry(jaro)
+
+// birth date
+
+function isDateWithinSomeDaysOfPeriod (period, date, days) {
+  const inMillisecs = 24 * 60 * 60 * 1000
+
+  const startTime = period.start.date.getTime() - days * inMillisecs
+  const startDate = new Date(startTime)
+
+  const endTime = period.end.date.getTime() + days * inMillisecs
+  const endDate = new Date(endTime)
+
+  return (startDate < date && date < endDate)
+}
+
+const isBornTooLongSince = _.curry((days, dateObject, individual) => {
+  if (!dateObject) return false
+  if (_.isEmpty(individual.birthDatePeriods)) return false
+  const isWithinSomeYears = _.partialRight(isDateWithinSomeDaysOfPeriod, [dateObject.date, days])
+  return !_.some(isWithinSomeYears, individual.birthDatePeriods)
+})
+
+// algorithm
+
+function match (structs, candidate, options) {
+  const {threshold, fullNameThreshold, ratio = 0.5, verboseFor} = options
+  const {fullName, words, birthDate} = candidate
+
+  // Accept aliases who's full name matches.
+  const doesNameMatch = _.flow(
+    _.get('fullName'),
+    stringSimilarity(fullName),
+    _.lte(fullNameThreshold)
+  )
+  const aliases = _.flatMap(_.get('aliases'), structs.individuals)
+  const aliasIdsFromFullName = _.flow(
+    _.filter(doesNameMatch),
+    _.map(_.get('id'))
+  )(aliases)
+
+  const phoneticWeight = ratio
+  const stringWeight = 1 - phoneticWeight
+
+  const matches = []
+
+  for (const word of words) {
+    const getPhonetic = phonetic => structs.phoneticMap.get(phonetic)
+    const phoneticMatches = new Set(_.flatMap(getPhonetic, word.phonetics))
+
+    for (const wordEntry of structs.wordList) {
+      const stringScore = stringSimilarity(word.value, wordEntry.value)
+
+      const verbose = _.includes(wordEntry.value, verboseFor)
+
+      for (const aliasId of wordEntry.aliasIds) {
+        const phoneticScore = phoneticMatches.has(aliasId) ? 1 : -1
+        const finalScore = stringWeight * stringScore + phoneticWeight * phoneticScore
+
+        verbose && logger.debug(finalScore.toFixed(2), stringScore.toFixed(2), phoneticScore.toFixed(2), word.value, wordEntry.value)
+
+        if (finalScore >= threshold) {
+          const entry = {aliasId, score: finalScore, word: word.value, value: wordEntry.value}
+          const index = _.sortedIndexBy(x => -x.score, entry, matches)
+          matches.splice(index, 0, entry)
+        }
+      }
+    }
+  }
+
+  const sameWord = (a, b) => a.aliasId === b.aliasId && a.word === b.word
+  const sameValue = (a, b) => a.aliasId === b.aliasId && a.value === b.value
+
+  const aliasIdsFromNamePart = _.flow(
+    _.uniqWith(sameWord),
+    _.uniqWith(sameValue),
+    _.map(_.get('aliasId')),
+    _.countBy(_.identity),
+    _.toPairs,
+    _.filter(([aliasId, count]) => {
+      const {length} = structs.aliasesMap.get(aliasId).words
+      return (count >= _.min([2, words.length, length]))
+    }),
+    _.map(_.first)
+  )(matches)
+
+  // Get the full record for each matched id
+  const getIndividual = aliasId => {
+    const individualId = structs.aliasToIndividual.get(aliasId)
+    return structs.individualsMap.get(individualId)
+  }
+  const suspects = _.uniq(_.map(getIndividual, [
+    ...aliasIdsFromFullName,
+    ...aliasIdsFromNamePart
+  ]))
+
+  // Reject everyone who is born two years away.
+  const twoYears = 365 * 2
+  const unqualified = isBornTooLongSince(twoYears, birthDate)
+  return _.reject(unqualified, suspects)
+}
+
+module.exports = {match}
--- a/packages/server/lib/ofac/name-utils.js
+++ b/packages/server/lib/ofac/name-utils.js
@ -0,0 +1,31 @@
+const doubleMetaphone = require('talisman/phonetics/double-metaphone')
+const _ = require('lodash/fp')
+
+const makePhonetic = _.flow(doubleMetaphone, _.uniq)
+
+// Combine name-parts in a standard order.
+
+const partOrdering = ['firstName', 'middleName', 'maidenName', 'patronymic', 'matronymic', 'lastName']
+
+const usingPartOrder = _.flow(
+  _.get('partName'),
+  _.partialRight(_.indexOf, [partOrdering])
+)
+
+const makeFullName = _.flow(
+  _.sortBy(usingPartOrder),
+  _.map(_.get('value')),
+  _.join(' ')
+)
+
+const makeWords = value => {
+  const words = _.split(' ', value)
+  const phonetics = _.map(makePhonetic, words)
+  const pairs = _.zipAll([words, phonetics])
+  return _.map(_.zipObject(['value', 'phonetics']), pairs)
+}
+
+module.exports = {
+  makeFullName,
+  makeWords
+}
--- a/packages/server/lib/ofac/parsing.js
+++ b/packages/server/lib/ofac/parsing.js
@ -0,0 +1,161 @@
+const fs = require('fs')
+const XmlStream = require('xml-stream')
+const nameUtils = require('./name-utils')
+const logger = require('../logger')
+const _ = require('lodash/fp')
+
+// KOSTIS TODO: get these from the document itself
+const INDIVIDUAL = '4'
+const NAME = '1403'
+const BIRTH_DATE = '8'
+const PRIMARY_LATIN = '1'
+
+const LAST_NAME = '1520'
+const FIRST_NAME = '1521'
+const MIDDLE_NAME = '1522'
+const MAIDEN_NAME = '1523'
+const PATRONYMIC = '91708'
+const MATRONYMIC = '91709'
+const NICKNAME = '1528'
+
+const partNames = new Map([
+  [LAST_NAME, 'lastName'],
+  [FIRST_NAME, 'firstName'],
+  [MIDDLE_NAME, 'middleName'],
+  [MAIDEN_NAME, 'maidenName'],
+  [PATRONYMIC, 'patronymic'],
+  [MATRONYMIC, 'matronymic'],
+  [NICKNAME, 'nickname']
+])
+
+const filteredWords = [
+  // 'al'
+]
+
+// group-id to type-id
+
+function processMasterNamePartGroup (groupNode) {
+  const namePartGroupNode = groupNode.NamePartGroup
+  const groupId = namePartGroupNode.$.ID
+  const typeId = namePartGroupNode.$.NamePartTypeID
+  return [groupId, typeId]
+}
+
+const processDocumentedNamePart = _.curry((groupTypes, namePartNode) => {
+  const valueNode = namePartNode.NamePartValue
+  const groupId = valueNode.$.NamePartGroupID
+  const typeId = groupTypes.get(groupId)
+  const partName = partNames.get(typeId)
+  const value = _.lowerCase(valueNode.$text)
+  return {partName, value}
+})
+
+const isLatin = _.matchesProperty(['$', 'DocNameStatusID'], PRIMARY_LATIN)
+
+const processAlias = _.curry((groupTypes, aliasNode) => {
+  if (aliasNode.$.AliasTypeID !== NAME) return
+  if (aliasNode.$.LowQuality === 'true') return
+
+  const getNamePart = processDocumentedNamePart(groupTypes)
+  const latinNameNode = _.find(isLatin, aliasNode.DocumentedName)
+  if (!latinNameNode) {
+    const id = aliasNode.$.FixedRef
+    const message = `Alias for Person with ID="${id}" has no latinized name`
+    logger.error(message)
+    return
+  }
+
+  const id = latinNameNode.$.ID
+  const namePartNodes = latinNameNode.DocumentedNamePart
+  const parts = _.map(getNamePart, namePartNodes)
+
+  const fullName = nameUtils.makeFullName(parts)
+  const words = _.flow(
+    nameUtils.makeWords,
+    _.reject(_.flow(
+      _.get('value'),
+      word => filteredWords.includes(word)
+    ))
+  )(fullName)
+
+  // if (words.length < 2) {
+  //   console.log(JSON.stringify(words))
+  // }
+
+  return {id, parts, fullName, words}
+})
+
+// birth date
+
+function processDate (dateNode) {
+  const year = parseInt(dateNode.Year)
+  const month = parseInt(dateNode.Month)
+  const day = parseInt(dateNode.Day)
+  return {year, month, day}
+}
+
+function processFeature (featureNode) {
+  if (featureNode.$.FeatureTypeID !== BIRTH_DATE) return
+
+  const datePeriodNode = featureNode.FeatureVersion.DatePeriod
+
+  // Ignore the fact that both Start and end can be a range.
+  // By using Start.From and End.To we use the extremes of the date-period.
+  const period = {
+    start: datePeriodNode.Start.From,
+    end: datePeriodNode.End.To
+  }
+
+  return _.mapValues(processDate, period)
+}
+
+// profile
+
+function processProfile (profileNode) {
+  if (profileNode.$.PartySubTypeID !== INDIVIDUAL) return
+
+  const id = profileNode.$.ID
+
+  const identityNode = profileNode.Identity
+  const groupTypesEntries = _.map(processMasterNamePartGroup, identityNode.NamePartGroups.MasterNamePartGroup)
+  const groupTypes = new Map(groupTypesEntries)
+
+  const mapCompact = _.flow(_.map, _.compact)
+
+  const getNameParts = processAlias(groupTypes)
+  const aliases = mapCompact(getNameParts, identityNode.Alias)
+
+  if (_.isEmpty(aliases)) return
+
+  const birthDatePeriods = mapCompact(processFeature, profileNode.Feature)
+  const individual = {id, aliases, birthDatePeriods}
+
+  return individual
+}
+
+const parse = (source, callback) => {
+  const stream = fs.createReadStream(source)
+  const xml = new XmlStream(stream)
+
+  xml.on('error', err => {
+    xml.pause()
+    const message = `Error while parsing OFAC data source file (${source}): ${err.message}`
+    callback(new Error(message))
+  })
+
+  xml.collect('Alias')
+  xml.collect('DocumentedName')
+  xml.collect('DocumentedNamePart')
+  xml.collect('Feature')
+  xml.collect('MasterNamePartGroup')
+
+  const forwardProfile = profile => profile && callback(null, profile)
+
+  xml.on('updateElement: Profile', _.flow(processProfile, forwardProfile))
+
+  xml.on('end', () => {
+    callback(null, null)
+  })
+}
+
+module.exports = {parse}
--- a/packages/server/lib/ofac/update.js
+++ b/packages/server/lib/ofac/update.js
@ -0,0 +1,135 @@
+const parser = require('./parsing')
+const axios = require('axios')
+const { createWriteStream } = require('fs')
+const { rename, writeFile, readFile, mkdir, copyFile, unlink } = require('fs/promises')
+const path = require('path')
+const _ = require('lodash/fp')
+
+const DOWNLOAD_DIR = path.resolve('/tmp')
+const OFAC_DATA_DIR = process.env.OFAC_DATA_DIR
+const OFAC_SOURCES_DIR = path.join(OFAC_DATA_DIR, 'sources')
+const LAST_UPDATED_FILE = path.resolve(OFAC_DATA_DIR, 'last_updated.dat')
+
+const OFAC_SOURCES = [{
+  name: 'sdn_advanced',
+  url: 'https://sanctionslistservice.ofac.treas.gov/api/download/sdn_advanced.xml'
+}, {
+  name: 'cons_advanced',
+  url: 'https://sanctionslistservice.ofac.treas.gov/api/download/cons_advanced.xml'
+}]
+
+const _mkdir = path =>
+  mkdir(path)
+    .catch(err => err.code === 'EEXIST' ? Promise.resolve() : Promise.reject(err))
+
+const download = (dstDir, { name, url }) => {
+  const dstFile = path.join(dstDir, name + '.xml')
+  const writer = createWriteStream(dstFile)
+
+  return axios({
+    method: 'get',
+    url: url,
+    responseType: 'stream',
+  }).then(response => {
+    return new Promise((resolve, reject) => {
+      response.data.pipe(writer)
+      let error = null
+      writer.on('error', err => {
+        error = err
+        writer.close()
+        reject(err)
+      })
+      writer.on('close', () => {
+        if (!error) {
+          resolve(dstFile)
+        }
+      })
+    })
+  })
+}
+
+const parseToJson = srcFile => {
+  const dstFile = srcFile.replace(/\.xml$/, '.json')
+  const writeStream = createWriteStream(dstFile)
+
+  return new Promise((resolve, reject) => {
+    parser.parse(srcFile, (err, profile) => {
+      if (err) {
+        reject(err)
+        return
+      }
+
+      if (!profile) {
+        writeStream.end()
+        return
+      }
+
+      const json = JSON.stringify(profile)
+      writeStream.write(json + '\n', 'utf-8')
+    })
+
+    writeStream.on('error', reject)
+    writeStream.on('finish', () => resolve(dstFile))
+  })
+}
+
+const moveToSourcesDir = async (srcFile, ofacSourcesDir) => {
+  const name = path.basename(srcFile)
+  const dstFile = path.join(ofacSourcesDir, name)
+  try {
+    await rename(srcFile, dstFile)
+  } catch (err) {
+    if (err.code === 'EXDEV') {
+      // If rename fails due to cross-device link, fallback to copy + delete
+      await copyFile(srcFile, dstFile)
+      await unlink(srcFile)
+    } else {
+      throw err
+    }
+  }
+  return dstFile
+}
+
+function update () {
+  if (!OFAC_DATA_DIR) {
+    throw new Error('ofacDataDir must be defined in the environment')
+  }
+
+  return _mkdir(OFAC_DATA_DIR)
+    .then(() => _mkdir(OFAC_SOURCES_DIR))
+    .catch(err => {
+      if (err.code === 'EEXIST') return
+      throw err
+    })
+    .then(() => readFile(LAST_UPDATED_FILE))
+    .then(data => {
+      const lastUpdate = new Date(data.toString())
+      const now = new Date()
+      const hoursSinceUpdate = (now - lastUpdate) / (1000 * 60 * 60)
+
+      return hoursSinceUpdate < 24
+    })
+    .catch(err => {
+      // If file doesn't exist, continue with update
+      if (err.code === 'ENOENT') return false
+      throw err
+    })
+    .then(skipUpdate => {
+      if (skipUpdate) return Promise.resolve()
+
+      const downloads = _.flow(
+        _.map(file => download(DOWNLOAD_DIR, file).then(parseToJson))
+      )(OFAC_SOURCES)
+
+      return Promise.all(downloads)
+        .then(parsed => {
+          const moves = _.map(src => moveToSourcesDir(src, OFAC_SOURCES_DIR), parsed)
+          const timestamp = new Date().toISOString()
+
+          return Promise.all([...moves])
+            .then(() => writeFile(LAST_UPDATED_FILE, timestamp))
+        })
+    })
+}
+
+module.exports = { update }