chore: use monorepo organization

2025-05-12 10:52:54 +01:00 · 2025-05-12 10:52:54 +01:00 · a687827f7e
commit a687827f7e
parent deaf7d6ecc
1099 changed files with 8184 additions and 11535 deletions
--- a/packages/server/tests/ofac/dist.all.last.txt
+++ b/packages/server/tests/ofac/dist.all.last.txt
--- a/packages/server/tests/ofac/dist.female.first.txt
+++ b/packages/server/tests/ofac/dist.female.first.txt
--- a/packages/server/tests/ofac/dist.male.first.txt
+++ b/packages/server/tests/ofac/dist.male.first.txt
--- a/packages/server/tests/ofac/matching.js
+++ b/packages/server/tests/ofac/matching.js
@ -0,0 +1,362 @@
+const assert = require('assert')
+const ofac = require('../../lib/ofac')
+const fs = require('fs')
+const path = require('path')
+const _ = require('lodash/fp')
+
+let structs
+let fullNames
+
+const rand = N => _.random(0, N - 1)
+
+const letters = _.range('a'.charCodeAt(0), 'z'.charCodeAt(0))
+const vowels = _.map(c => c.charCodeAt(0), ['a', 'e', 'i', 'o', 'u'])
+const consonants = _.difference(letters, vowels)
+
+const duplicate = (word, index) => {
+  const c = word[index]
+  return _.join('', [word.slice(0, index), c, c, word.slice(index + 1)])
+}
+
+const remove = (word, index) => {
+  return _.join('', [word.slice(0, index), word.slice(index + 1)])
+}
+
+const transpose = (word, index) => {
+  const a = word[index]
+  const b = word[index + 1] || ' '
+  return _.join('', [word.slice(0, index), b, a, word.slice(index + 2)])
+}
+
+const alter = (word, index) => {
+  if (word[index] === ' ') return word
+  const o = word.charCodeAt(index)
+  const collection = _.includes(o, vowels) ? vowels : consonants
+  const oo = _.sample(collection)
+  const cc = String.fromCharCode(oo)
+  return _.join('', [word.slice(0, index), cc, word.slice(index + 1)])
+}
+
+const misspellOps = [
+  duplicate,
+  remove,
+  transpose,
+  alter
+]
+
+const misspell = word => {
+  const len = word.length
+  const index = _.random(1, len)
+  const operation = _.sample(misspellOps)
+  return operation(word, index)
+}
+
+const misspellRandomly = word => {
+  const len = word.length
+  const num = _.random(1, Math.sqrt(len))
+  return _.flow(..._.times(() => misspell, num))(word)
+}
+
+
+const shiftVowel = word => {
+  const indexedVowels = _.flow(
+    _.get('length'),
+    _.range(0),
+    _.zip(_.split('', word)),
+    _.map(_.zipObject(['letter', 'index'])),
+    _.map(indexedLetter => {
+      const ord = indexedLetter.letter.charCodeAt(0)
+      const vowelIndex = _.indexOf(ord, vowels)
+      return {...indexedLetter, vowelIndex}
+    }),
+    _.reject(_.flow(
+      _.get('vowelIndex'),
+      _.eq(-1)
+    ))
+  )(word)
+
+  if (_.isEmpty(indexedVowels)) return false
+
+  const indexedVowel = _.sample(indexedVowels)
+  const options = indexedVowel.vowelIndex === 0 ? [ +1 ]
+                : indexedVowel.vowelIndex === 4 ? [ -1 ]
+                : [ -1, +1 ]
+  const offset = _.sample(options)
+  const replacementOrd = vowels[indexedVowel.vowelIndex + offset]
+  const replacement = String.fromCharCode(replacementOrd)
+
+  const index = indexedVowel.index
+  return _.join('', [word.slice(0, index), replacement, word.slice(index + 1)])
+}
+
+const makeReplacer = (a, b) => word => {
+  const replaced = word.replace(a, b)
+  return (replaced !== word) && replaced
+}
+
+const makeReplacerPair = (a, b) => [
+  makeReplacer(a, b),
+  makeReplacer(b, a),
+]
+
+const equivalences = [
+  shiftVowel,
+  ...makeReplacerPair('v', 'f'),
+  ...makeReplacerPair('ph', 'f'),
+  ...makeReplacerPair('ck', 'k'),
+  ...makeReplacerPair('q', 'k')
+]
+
+const transcribe = word => {
+  const ops = _.shuffle(equivalences)
+  for (const op of ops) {
+    const transcribed = op(word)
+    if (transcribed) return transcribed
+  }
+}
+
+const threshold = 0.85
+const fullNameThreshold = 0.95
+
+describe('OFAC', function () {
+  describe('Matching', function () {
+
+    before(function () {
+      this.timeout(60000)
+      return ofac.load()
+        .then(result => {
+          structs = result
+          const {individuals} = structs
+          fullNames = _.flow(
+            _.flatMap('aliases'),
+            _.map('fullName')
+          )(individuals)
+        })
+    })
+
+    it.skip('should match the exact full names of suspects', function () {
+      this.timeout(0)
+
+      for (const fullName of fullNames) {
+        const matches = ofac.match({firstName: fullName}, null, {
+          threshold,
+          fullNameThreshold,
+        })
+        assert.ok(!_.isEmpty(matches))
+      }
+    })
+
+    it.skip('should match the permutated full names of suspects', function () {
+      this.timeout(0)
+
+      for (const fullName of fullNames) {
+        const reversed = _.flow(
+          _.split(' '),
+          _.reverse,
+          _.join(' ')
+        )(fullName)
+
+        const matches = ofac.match({firstName: reversed}, null, {
+          threshold,
+          fullNameThreshold,
+        })
+        assert.ok(!_.isEmpty(matches))
+      }
+    })
+
+    it('should match despite some misspellings', function () {
+      this.timeout(0)
+
+      let countMatches = 0
+      const failures = []
+
+      for (const fullName of fullNames) {
+        const lightlyMisspelled = misspell(fullName)
+
+        const heavilyMisspelled = _.flow(
+          _.split(' '),
+          _.map(misspell),
+          _.join(' ')
+        )(fullName)
+
+        const matchesA = ofac.match({firstName: lightlyMisspelled}, null, {
+          threshold,
+          fullNameThreshold,
+        })
+
+        if (!_.isEmpty(matchesA)) {
+          countMatches += 1
+        }
+        else {
+          failures.push({fullName, misspelled: lightlyMisspelled})
+        }
+
+        const matchesB = ofac.match({firstName: heavilyMisspelled}, null, {
+          threshold: threshold - 0.1,//: 0.75
+        })
+
+        if (!_.isEmpty(matchesB)) {
+          countMatches += 1
+        }
+        else {
+          failures.push({fullName, heavy: true, misspelled: heavilyMisspelled})
+        }
+      }
+
+      for (const failure of failures) {
+        const {fullName, heavy, misspelled} = failure
+        console.log("Original:", fullName)
+        ofac.match({firstName: misspelled}, null, {
+          threshold: threshold + (heavy ? -0.1 : 0),
+          debug: true
+        })
+      }
+
+      assert.equal(countMatches, fullNames.length * 2)
+    })
+
+    it('should match phonetically similar words', function () {
+      this.timeout(0)
+
+      let countMatches = 0
+      const failures = []
+
+      for (const fullName of fullNames) {
+        const transcribed = transcribe(fullName)
+
+        if (!transcribed) {
+          console.warn(`Couldn't find an appropriate phonetic alteration for '${fullName}'`)
+          countMatches += 1
+          continue
+        }
+
+        const matches = ofac.match({firstName: transcribed}, null, {
+          threshold,
+          fullNameThreshold,
+        })
+
+        if (!_.isEmpty(matches)) {
+          countMatches += 1
+        }
+        else {
+          failures.push({fullName, misspelled: transcribed})
+        }
+      }
+
+      for (const failure of failures) {
+        const {fullName, misspelled} = failure
+        console.log("Original:", fullName)
+        ofac.match({firstName: misspelled}, null, {
+          threshold,
+          fullNameThreshold,
+          debug: true
+        })
+      }
+
+      assert.equal(countMatches, fullNames.length)
+    })
+
+    it('should discard matches with inapropriate birthdates', function () {
+      this.timeout(0)
+
+      const date = new Date()
+      const YYYY = _.padCharsStart('0', 4, date.getFullYear())
+      const MM = _.padCharsStart('0', 2, date.getMonth() + 1)
+      const DD = _.padCharsStart('0', 2, date.getDate())
+      const dateString = `${YYYY}${MM}${DD}`
+
+      const noMatchesWithBirthDates = _.every(_.flow(
+        _.get('birthDatePeriods'),
+        _.every(_.isEmpty)
+      ))
+
+      for (const fullName of fullNames) {
+        const matches = ofac.match({firstName: fullName}, dateString, {
+          threshold,
+          fullNameThreshold,
+        })
+        assert.ok(noMatchesWithBirthDates(matches))
+      }
+    })
+
+    it('should not match against common names', function () {
+      this.timeout(0)
+
+      const getNamesFromFile = _.flow(
+        name => path.resolve(__dirname, name),
+        file => fs.readFileSync(file, 'utf-8'),
+        _.split('\n'),
+        _.map( _.flow(
+          _.split(' '),
+          _.first
+        ))
+      )
+
+      const lastNames = getNamesFromFile('dist.all.last.txt')
+      const firstNamesMale = getNamesFromFile('dist.male.first.txt')
+      const firstNamesFemale = getNamesFromFile('dist.female.first.txt')
+
+      let countMatches = 0
+      const failures = []
+
+      for (const lastName of lastNames.slice(0, 100)) {
+        for (firstName of firstNamesMale.slice(0, 100)) {
+          const matches = ofac.match({firstName, lastName}, null, {
+            threshold,
+            fullNameThreshold,
+          })
+
+          if (!_.isEmpty(matches)) {
+            countMatches += 1
+            failures.push({firstName, lastName})
+          }
+        }
+
+        for (firstName of firstNamesFemale.slice(0, 100)) {
+          const matches = ofac.match({firstName, lastName}, null, {
+            threshold,
+            fullNameThreshold,
+          })
+
+          if (!_.isEmpty(matches)) {
+            countMatches += 1
+            failures.push({firstName, lastName})
+          }
+        }
+      }
+
+      for (const failure of failures) {
+        ofac.match(failure, null, {
+          threshold,
+          fullNameThreshold,
+          debug: true
+        })
+      }
+
+      assert.equal(countMatches, 0)
+    })
+
+
+    it.skip('test', function () {
+      const firstName = 'hian chariapaporn'
+      ofac.match({firstName}, null, {
+        threshold,
+        fullNameThreshold,
+        debug: true,
+        verboseFor: ['hiran', 'chariapaporn']
+      })
+    })
+
+
+    it.skip('test', function () {
+      const firstName = 'janice smith'
+      ofac.match({firstName}, null, {
+        threshold,
+        fullNameThreshold,
+        debug: true,
+        verboseFor: ['samih', 'anis']
+      })
+    })
+
+  })
+})
--- a/packages/server/tests/ofac/parsing.js
+++ b/packages/server/tests/ofac/parsing.js
@ -0,0 +1,188 @@
+const assert = require('assert')
+const parser = require('../../lib/ofac/parsing')
+
+const fs = require('fs')
+const path = require('path')
+const util = require('util')
+const _ = require('lodash/fp')
+
+const randomTmpFileName = () => path.join('/tmp', Math.random().toString())
+
+const writeFile = util.promisify(fs.writeFile)
+
+function makeDataFiles (contents) {
+  const fileNames = _.map(randomTmpFileName, Array(contents.length))
+  const pairs = _.zip(fileNames, contents)
+  return Promise.all(_.map(_.spread(writeFile), pairs))
+  .then(() => fileNames)
+}
+
+
+const mapLines = _.flow(_.map, _.join(''))
+
+const partIds = new Map([
+  ['lastName',    1520],
+  ['firstName',   1521],
+  ['middleName',  1522],
+  ['maidenName',  1523],
+  ['patronymic', 91708],
+  ['matronymic', 91709],
+  ['nickname',    1528]
+])
+
+const getId = part => partIds.get(part.partName)
+
+const makePart = part => '' +
+`					<DocumentedNamePart>
+						<NamePartValue NamePartGroupID="${getId(part)}">${part.value}</NamePartValue>
+					</DocumentedNamePart>`
+
+const makeAlias = alias => '' +
+`			<Alias AliasTypeID="1403">
+				<DocumentedName ID="${alias.id}" DocNameStatusID="1">
+${_.map(makePart, alias.parts)}
+				</DocumentedName>
+			</Alias>`
+
+const makePartGroup = part => '' +
+`				<MasterNamePartGroup>
+					<NamePartGroup
+						ID="${getId(part)}"
+						NamePartTypeID="${getId(part)}"/>
+				</MasterNamePartGroup>`
+
+const makePartGroups = alias => mapLines(makePartGroup, alias.parts)
+
+const makeBirthDate = birthDate => '' +
+`		<Feature FeatureTypeID="8">
+			<FeatureVersion>
+				<DatePeriod>
+					<Start>
+						<From>
+							<Year>${birthDate.start.year}</Year>
+							<Month>${birthDate.start.month}</Month>
+							<Day>${birthDate.start.day}</Day>
+						</From>
+						<To>
+							<Year>${birthDate.start.year}</Year>
+							<Month>${birthDate.start.month}</Month>
+							<Day>${birthDate.start.day}</Day>
+						</To>
+					</Start>
+					<End>
+						<From>
+							<Year>${birthDate.end.year}</Year>
+							<Month>${birthDate.end.month}</Month>
+							<Day>${birthDate.end.day}</Day>
+						</From>
+						<To>
+							<Year>${birthDate.end.year}</Year>
+							<Month>${birthDate.end.month}</Month>
+							<Day>${birthDate.end.day}</Day>
+						</To>
+					</End>
+				</DatePeriod>
+			</FeatureVersion>
+		</Feature>`
+
+const makeProfile = profile => {
+  return '' +
+` <Profile ID="${profile.id}" PartySubTypeID="4">
+		<Identity>
+${mapLines(makeAlias, profile.aliases)}
+			<NamePartGroups>
+${mapLines(makePartGroups, profile.aliases)}
+			</NamePartGroups>
+		</Identity>
+${mapLines(makeBirthDate, profile.birthDatePeriods)}
+	</Profile>`
+}
+
+const makeXml = profiles => '' +
+`<?xml version="1.0" encoding="utf-8"?>
+<doc>
+${mapLines(makeProfile, profiles)}
+</doc>`
+
+
+const individualA = {id: '9', aliases: [{id: '5',
+  parts: [
+    {partName: 'firstName', value: 'john'},
+    {partName: 'lastName', value: 'doe'}],
+  fullName: 'john doe',
+  words: [
+    {value: 'john', phonetics: ['JN', 'AN']},
+    {value: 'doe', phonetics: ['T']}]}],
+  birthDatePeriods: [{
+    start: {year: 1955, month: 10, day: 5},
+    end: {year: 1955, month: 10, day: 5}}]
+}
+
+const individualB = {id: '11', aliases: [{id: '15',
+  parts: [
+    {partName: 'firstName', value: 'john'},
+    {partName: 'middleName', value: 'de'},
+    {partName: 'lastName', value: 'gaul'}],
+  fullName: 'john de gaul',
+  words: [
+    {value: 'john', phonetics: ['JN', 'AN']},
+    {value: 'de', phonetics: ['T']},
+    {value: 'gaul', phonetics: ['KL']}]}],
+  birthDatePeriods: [{
+    start: {year: 1965, month: 11, day: 20},
+    end: {year: 1965, month: 11, day: 20}}]
+}
+
+
+const parseIndividuals = source => {
+  const individuals = []
+
+  return new Promise((resolve, reject) => {
+    parser.parse(source, (err, profile) => {
+      if (err) {
+        reject(err)
+        return
+      }
+
+      if (!profile) {
+        resolve(individuals)
+        return
+      }
+
+      individuals.push(profile)
+    })
+  })
+}
+
+
+describe('OFAC', function () {
+  describe('Parsing', function () {
+
+    // To detect botched downloads
+    it('should fail on malformed XML', function () {
+      const xml = '<a><b></a>'
+      return makeDataFiles([xml])
+      .then(files => Promise.all(_.map(parseIndividuals, files)))
+      .catch(error => {
+        assert.ok(error instanceof Error)
+        return 'failed'
+      })
+      .then(ret => {
+        assert.equal(ret, 'failed')
+      })
+    })
+
+    it('should return the expected individuals', function () {
+      const xml = makeXml([individualA, individualB])
+
+      return makeDataFiles([xml])
+      .then(files => Promise.all(_.map(parseIndividuals, files)))
+      .then(([individuals]) => {
+        assert.ok(Array.isArray(individuals))
+        assert.equal(individuals.length, 2)
+        assert.deepEqual(individuals, [individualA, individualB])
+      })
+    })
+
+  })
+})