Parsing moved to downloading. Matching is being tweaked.
This commit is contained in:
parent
793db0f449
commit
b72f5549a5
10 changed files with 456 additions and 276 deletions
|
|
@ -65,7 +65,8 @@ openssl x509 \
|
||||||
|
|
||||||
rm /tmp/Lamassu_OP.csr.pem
|
rm /tmp/Lamassu_OP.csr.pem
|
||||||
|
|
||||||
mkdir -p $OFAC_DATA_DIR
|
mkdir -p $OFAC_DATA_DIR/sources
|
||||||
|
touch $OFAC_DATA_DIR/etags.json
|
||||||
|
|
||||||
cat <<EOF > $CONFIG_DIR/lamassu.json
|
cat <<EOF > $CONFIG_DIR/lamassu.json
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
const fs = require('fs')
|
const fs = require('fs')
|
||||||
const path = require('path')
|
const path = require('path')
|
||||||
const util = require('util')
|
const util = require('util')
|
||||||
const parser = require('./parsing')
|
const loader = require('./loading')
|
||||||
const matcher = require('./matching')
|
const matcher = require('./matching')
|
||||||
const nameUtils = require('./name-utils')
|
const nameUtils = require('./name-utils')
|
||||||
const options = require('../options')
|
const options = require('../options')
|
||||||
|
|
@ -9,7 +9,7 @@ const _ = require('lodash/fp')
|
||||||
|
|
||||||
const debug_log = require('../pp')(__filename) // KOSTIS TODO: remove
|
const debug_log = require('../pp')(__filename) // KOSTIS TODO: remove
|
||||||
|
|
||||||
const OFAC_DATA_DIR = options.ofacDataDir
|
const OFAC_SOURCES_DIR = path.join(options.ofacDataDir, 'sources')
|
||||||
|
|
||||||
let structs = null
|
let structs = null
|
||||||
|
|
||||||
|
|
@ -18,15 +18,15 @@ const readdir = util.promisify(fs.readdir)
|
||||||
function load () {
|
function load () {
|
||||||
// NOTE: Not sure how you push code updates to existing clients. This problem
|
// NOTE: Not sure how you push code updates to existing clients. This problem
|
||||||
// might pop up if new code is pushed, without re-doing setup.
|
// might pop up if new code is pushed, without re-doing setup.
|
||||||
if (!OFAC_DATA_DIR) {
|
if (!OFAC_SOURCES_DIR) {
|
||||||
const message = 'The ofacDataDir option has not been set in lamassu.json'
|
const message = 'The ofacDataDir option has not been set in lamassu.json'
|
||||||
return Promise.reject(new Error(message))
|
return Promise.reject(new Error(message))
|
||||||
}
|
}
|
||||||
|
|
||||||
return readdir(OFAC_DATA_DIR)
|
return readdir(OFAC_SOURCES_DIR)
|
||||||
.then(_.flow(
|
.then(_.flow(
|
||||||
_.map(file => path.join(OFAC_DATA_DIR, file)),
|
_.map(file => path.join(OFAC_SOURCES_DIR, file)),
|
||||||
parser.parse
|
loader.load
|
||||||
))
|
))
|
||||||
.then(result => {
|
.then(result => {
|
||||||
return (structs = result)
|
return (structs = result)
|
||||||
|
|
@ -42,7 +42,8 @@ function makeCompatible (nameParts) {
|
||||||
return _.map(_.zipObject(['partName', 'value']), props)
|
return _.map(_.zipObject(['partName', 'value']), props)
|
||||||
}
|
}
|
||||||
|
|
||||||
function match (nameParts, birthDateString, threshold) {
|
function match (nameParts, birthDateString, options) {
|
||||||
|
const {debug} = options
|
||||||
if (!structs) {
|
if (!structs) {
|
||||||
const message = 'The OFAC data sources have not been loaded yet.'
|
const message = 'The OFAC data sources have not been loaded yet.'
|
||||||
return Promise.reject(new Error(message))
|
return Promise.reject(new Error(message))
|
||||||
|
|
@ -68,10 +69,10 @@ function match (nameParts, birthDateString, threshold) {
|
||||||
])(birthDateString)
|
])(birthDateString)
|
||||||
|
|
||||||
const candidate = {parts, fullName, words, birthDate}
|
const candidate = {parts, fullName, words, birthDate}
|
||||||
// debug_log(candidate)
|
debug && debug_log(candidate)
|
||||||
|
|
||||||
const result = matcher.match(structs, candidate, threshold)
|
const result = matcher.match(structs, candidate, options)
|
||||||
// debug_log(result)
|
debug && debug_log(result)
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
103
lib/ofac/loading.js
Normal file
103
lib/ofac/loading.js
Normal file
|
|
@ -0,0 +1,103 @@
|
||||||
|
const fs = require('fs')
|
||||||
|
const ndjson = require('ndjson')
|
||||||
|
const _ = require('lodash/fp')
|
||||||
|
|
||||||
|
|
||||||
|
const mapAliases = _.curry((iteratee, individuals) => {
|
||||||
|
const mapIndividual = individual => {
|
||||||
|
const {id, aliases} = individual
|
||||||
|
return _.map(alias => iteratee(id, alias), aliases)
|
||||||
|
}
|
||||||
|
return _.flatMap(mapIndividual, individuals)
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
const getPhoneticEntries = (individualId, alias) => {
|
||||||
|
const pairPhoneticsWithValues = word => {
|
||||||
|
const {value, phonetics} = word
|
||||||
|
const makeEntry = phonetic => ({value, phonetic, aliasId: alias.id})
|
||||||
|
return _.map(makeEntry, phonetics)
|
||||||
|
}
|
||||||
|
return _.flatMap(pairPhoneticsWithValues, alias.words)
|
||||||
|
}
|
||||||
|
|
||||||
|
const producePhoneticMap = _.flow(
|
||||||
|
mapAliases(getPhoneticEntries),
|
||||||
|
_.flatten,
|
||||||
|
_.groupBy(_.get('phonetic')),
|
||||||
|
_.mapValues(_.flow(
|
||||||
|
_.map(_.get('aliasId')),
|
||||||
|
_.uniq
|
||||||
|
)),
|
||||||
|
_.toPairs,
|
||||||
|
entries => new Map(entries)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
const getWords = (individualId, alias) => {
|
||||||
|
const pairWordsWithIds = word => ({value: word.value, aliasId: alias.id})
|
||||||
|
return _.map(pairWordsWithIds, alias.words)
|
||||||
|
}
|
||||||
|
|
||||||
|
const produceWordList = _.flow(
|
||||||
|
mapAliases(getWords),
|
||||||
|
_.flatten,
|
||||||
|
_.groupBy(_.get('value')),
|
||||||
|
_.mapValues(_.map(_.get('aliasId'))),
|
||||||
|
_.toPairs,
|
||||||
|
_.map(_.zipObject(['value', 'aliasIds']))
|
||||||
|
)
|
||||||
|
|
||||||
|
const parseSource = source => {
|
||||||
|
const individuals = []
|
||||||
|
|
||||||
|
const readStream = fs.createReadStream(source)
|
||||||
|
const jsonStream = readStream.pipe(ndjson.parse())
|
||||||
|
jsonStream.on('data', individual => {
|
||||||
|
individuals.push(individual)
|
||||||
|
})
|
||||||
|
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
jsonStream.on('error', reject)
|
||||||
|
jsonStream.on('end', () => {
|
||||||
|
resolve(individuals)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
const load = sources => Promise.all(_.map(parseSource, sources))
|
||||||
|
.then(_.flow(
|
||||||
|
_.flatten,
|
||||||
|
_.compact,
|
||||||
|
_.uniqBy(_.get('id')),
|
||||||
|
individuals => {
|
||||||
|
|
||||||
|
const individualsMap = _.flow(
|
||||||
|
_.groupBy(_.get('id')),
|
||||||
|
_.mapValues(_.first),
|
||||||
|
_.toPairs,
|
||||||
|
entries => new Map(entries)
|
||||||
|
)(individuals)
|
||||||
|
|
||||||
|
const makeEntries = (individualId, alias) => [alias.id, alias]
|
||||||
|
const aliasesMap = new Map(mapAliases(makeEntries, individuals))
|
||||||
|
|
||||||
|
const getIdPairs = (individualId, alias) => [alias.id, individualId]
|
||||||
|
const idPairs = mapAliases(getIdPairs, individuals)
|
||||||
|
const aliasToIndividual = new Map(idPairs)
|
||||||
|
|
||||||
|
const phoneticMap = producePhoneticMap(individuals)
|
||||||
|
const wordList = produceWordList(individuals)
|
||||||
|
|
||||||
|
return {
|
||||||
|
individuals,
|
||||||
|
individualsMap,
|
||||||
|
aliasesMap,
|
||||||
|
aliasToIndividual,
|
||||||
|
phoneticMap,
|
||||||
|
wordList
|
||||||
|
}
|
||||||
|
}
|
||||||
|
))
|
||||||
|
|
||||||
|
module.exports = {load}
|
||||||
|
|
@ -28,7 +28,8 @@ const isBornTooLongSince = _.curry((days, dateObject, individual) => {
|
||||||
|
|
||||||
// algorithm
|
// algorithm
|
||||||
|
|
||||||
function match (structs, candidate, threshold) {
|
function match (structs, candidate, options) {
|
||||||
|
const {threshold, ratio = 0.1, debug, verboseFor} = options
|
||||||
const {fullName, words, birthDate} = candidate
|
const {fullName, words, birthDate} = candidate
|
||||||
|
|
||||||
// Accept aliases who's full name matches.
|
// Accept aliases who's full name matches.
|
||||||
|
|
@ -44,42 +45,57 @@ function match (structs, candidate, threshold) {
|
||||||
)(aliases)
|
)(aliases)
|
||||||
|
|
||||||
|
|
||||||
const aliasIds = []
|
const aliasIdCounts = new Map()
|
||||||
const phoneticWeight = 0.17
|
const phoneticWeight = ratio
|
||||||
const stringWeight = 1 - phoneticWeight
|
const stringWeight = 1 - phoneticWeight
|
||||||
|
|
||||||
for (const word of words) {
|
for (const word of words) {
|
||||||
const getPhonetic = phonetic => structs.phoneticMap.get(phonetic)
|
const getPhonetic = phonetic => structs.phoneticMap.get(phonetic)
|
||||||
const phoneticMatches = new Set(_.flatMap(getPhonetic, word.phonetics))
|
const phoneticMatches = new Set(_.flatMap(getPhonetic, word.phonetics))
|
||||||
|
|
||||||
|
const aliasIds = new Set()
|
||||||
|
|
||||||
for (const wordEntry of structs.wordList) {
|
for (const wordEntry of structs.wordList) {
|
||||||
const stringScore = stringSimilarity(word.value, wordEntry.value)
|
const stringScore = stringSimilarity(word.value, wordEntry.value)
|
||||||
|
|
||||||
if (stringWeight * stringScore + phoneticWeight < threshold) continue
|
const verbose = _.includes(wordEntry.value, verboseFor)
|
||||||
|
|
||||||
|
if (!verbose && stringWeight * stringScore + phoneticWeight < threshold) continue
|
||||||
|
|
||||||
for (const aliasId of wordEntry.aliasIds) {
|
for (const aliasId of wordEntry.aliasIds) {
|
||||||
const phoneticScore = phoneticMatches.has(aliasId) ? 1 : 0
|
const phoneticScore = phoneticMatches.has(aliasId) ? 1 : -1
|
||||||
const finalScore = stringWeight * stringScore + phoneticWeight * phoneticScore
|
// const finalScore = stringWeight * stringScore + phoneticWeight * phoneticScore
|
||||||
|
const finalScore = stringScore + phoneticWeight * phoneticScore
|
||||||
|
|
||||||
|
verbose && console.log(finalScore.toFixed(2), stringScore.toFixed(2), phoneticScore.toFixed(2), word.value, wordEntry.value)
|
||||||
|
|
||||||
if (finalScore >= threshold) {
|
if (finalScore >= threshold) {
|
||||||
aliasIds.push(aliasId)
|
aliasIds.add(aliasId)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
verboseFor && console.log(aliasIds)
|
||||||
|
|
||||||
|
for (const aliasId of aliasIds.values()) {
|
||||||
|
const count = aliasIdCounts.get(aliasId) || 0
|
||||||
|
aliasIdCounts.set(aliasId, count + 1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const aliasIdsFromNamePart = _.flow(
|
verboseFor && console.log(aliasIdCounts)
|
||||||
_.countBy(_.identity),
|
|
||||||
_.toPairs,
|
|
||||||
_.reject(_.flow(
|
|
||||||
_.last,
|
|
||||||
_.gt(2)
|
|
||||||
)),
|
|
||||||
_.map(_.first)
|
|
||||||
)(aliasIds)
|
|
||||||
|
|
||||||
// debug_log(aliasIdsFromFullName)
|
const aliasIdsFromNamePart = []
|
||||||
// debug_log(aliasIdsFromNamePart)
|
|
||||||
|
for (const [aliasId, count] of aliasIdCounts) {
|
||||||
|
const {length} = structs.aliasesMap.get(aliasId).words
|
||||||
|
if (count >= _.min([2, words.length, length])) {
|
||||||
|
aliasIdsFromNamePart.push(aliasId)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
debug && debug_log(aliasIdsFromFullName)
|
||||||
|
debug && debug_log(aliasIdsFromNamePart)
|
||||||
|
|
||||||
// Get the full record for each matched id
|
// Get the full record for each matched id
|
||||||
const getIndividual = aliasId => {
|
const getIndividual = aliasId => {
|
||||||
|
|
|
||||||
|
|
@ -139,111 +139,29 @@ function processProfile (profileNode) {
|
||||||
return individual
|
return individual
|
||||||
}
|
}
|
||||||
|
|
||||||
function promiseParseDocument (source) {
|
const parse = (source, callback) => {
|
||||||
return new Promise((resolve, reject) => {
|
const stream = fs.createReadStream(source)
|
||||||
const stream = fs.createReadStream(source)
|
const xml = new XmlStream(stream)
|
||||||
const xml = new XmlStream(stream)
|
|
||||||
|
|
||||||
xml.on('error', err => {
|
xml.on('error', err => {
|
||||||
xml.pause()
|
xml.pause()
|
||||||
const message = `Error while parsing OFAC data source file (${source}): ${err.message}`
|
const message = `Error while parsing OFAC data source file (${source}): ${err.message}`
|
||||||
reject(new Error(message))
|
callback(new Error(message))
|
||||||
})
|
})
|
||||||
|
|
||||||
xml.collect('Alias')
|
xml.collect('Alias')
|
||||||
xml.collect('DocumentedName')
|
xml.collect('DocumentedName')
|
||||||
xml.collect('DocumentedNamePart')
|
xml.collect('DocumentedNamePart')
|
||||||
xml.collect('Feature')
|
xml.collect('Feature')
|
||||||
xml.collect('MasterNamePartGroup')
|
xml.collect('MasterNamePartGroup')
|
||||||
|
|
||||||
const individuals = []
|
const forwardProfile = profile => profile && callback(null, profile)
|
||||||
|
|
||||||
const collectResult = result => result && individuals.push(result)
|
xml.on('updateElement: Profile', _.flow(processProfile, forwardProfile))
|
||||||
xml.on('updateElement: Profile', _.flow(processProfile, collectResult))
|
|
||||||
|
|
||||||
xml.on('end', _.wrap(resolve, individuals))
|
xml.on('end', () => {
|
||||||
|
callback(null, null)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
const mapAliases = _.curry((iteratee, individuals) => {
|
|
||||||
const mapIndividual = individual => {
|
|
||||||
const {id, aliases} = individual
|
|
||||||
return _.map(alias => iteratee(id, alias), aliases)
|
|
||||||
}
|
|
||||||
return _.flatMap(mapIndividual, individuals)
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
const getPhoneticEntries = (individualId, alias) => {
|
|
||||||
const pairPhoneticsWithValues = word => {
|
|
||||||
const {value, phonetics} = word
|
|
||||||
const makeEntry = phonetic => ({value, phonetic, aliasId: alias.id})
|
|
||||||
return _.map(makeEntry, phonetics)
|
|
||||||
}
|
|
||||||
return _.flatMap(pairPhoneticsWithValues, alias.words)
|
|
||||||
}
|
|
||||||
|
|
||||||
const producePhoneticMap = _.flow(
|
|
||||||
mapAliases(getPhoneticEntries),
|
|
||||||
_.flatten,
|
|
||||||
_.groupBy(_.get('phonetic')),
|
|
||||||
_.mapValues(_.flow(
|
|
||||||
_.map(_.get('aliasId')),
|
|
||||||
_.uniq
|
|
||||||
)),
|
|
||||||
_.toPairs,
|
|
||||||
entries => new Map(entries)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
const getWords = (individualId, alias) => {
|
|
||||||
const pairWordsWithIds = word => ({value: word.value, aliasId: alias.id})
|
|
||||||
return _.map(pairWordsWithIds, alias.words)
|
|
||||||
}
|
|
||||||
|
|
||||||
const produceWordList = _.flow(
|
|
||||||
mapAliases(getWords),
|
|
||||||
_.flatten,
|
|
||||||
_.groupBy(_.get('value')),
|
|
||||||
_.mapValues(_.map(_.get('aliasId'))),
|
|
||||||
_.toPairs,
|
|
||||||
_.map(_.zipObject(['value', 'aliasIds']))
|
|
||||||
)
|
|
||||||
|
|
||||||
function parse (sources) {
|
|
||||||
return Promise.all(_.map(promiseParseDocument, sources))
|
|
||||||
.then(_.flow(
|
|
||||||
_.flatten,
|
|
||||||
_.compact,
|
|
||||||
_.uniqBy(_.get('id')),
|
|
||||||
individuals => {
|
|
||||||
const individualsMap = _.flow(
|
|
||||||
_.groupBy(_.get('id')),
|
|
||||||
_.mapValues(_.first),
|
|
||||||
_.toPairs,
|
|
||||||
entries => new Map(entries)
|
|
||||||
)(individuals)
|
|
||||||
|
|
||||||
const makeEntries = (individualId, alias) => [alias.id, alias]
|
|
||||||
const aliasesMap = new Map(mapAliases(makeEntries, individuals))
|
|
||||||
|
|
||||||
const getIdPairs = (individualId, alias) => [alias.id, individualId]
|
|
||||||
const idPairs = mapAliases(getIdPairs, individuals)
|
|
||||||
const aliasToIndividual = new Map(idPairs)
|
|
||||||
|
|
||||||
const phoneticMap = producePhoneticMap(individuals)
|
|
||||||
const wordList = produceWordList(individuals)
|
|
||||||
|
|
||||||
return {
|
|
||||||
individuals,
|
|
||||||
individualsMap,
|
|
||||||
aliasesMap,
|
|
||||||
aliasToIndividual,
|
|
||||||
phoneticMap,
|
|
||||||
wordList
|
|
||||||
}
|
|
||||||
}
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
module.exports = {parse}
|
module.exports = {parse}
|
||||||
|
|
|
||||||
155
lib/ofac/update.js
Normal file
155
lib/ofac/update.js
Normal file
|
|
@ -0,0 +1,155 @@
|
||||||
|
const parser = require('./parsing')
|
||||||
|
const https = require('https')
|
||||||
|
const url = require('url')
|
||||||
|
const fs = require('fs')
|
||||||
|
const path = require('path')
|
||||||
|
const util = require('util')
|
||||||
|
const options = require('../options')
|
||||||
|
const _ = require('lodash/fp')
|
||||||
|
|
||||||
|
const OFAC_DATA_DIR = options.ofacDataDir
|
||||||
|
const OFAC_SOURCES_DIR = path.join(OFAC_DATA_DIR, 'sources')
|
||||||
|
const OFAC_SOURCES_FILE = path.join(OFAC_DATA_DIR, 'sources.json')
|
||||||
|
const OFAC_ETAGS_FILE = path.join(OFAC_DATA_DIR, 'etags.json')
|
||||||
|
const DOWNLOAD_DIR = path.resolve('/tmp')
|
||||||
|
|
||||||
|
|
||||||
|
const readFile = util.promisify(fs.readFile)
|
||||||
|
const writeFile = util.promisify(fs.writeFile)
|
||||||
|
const rename = util.promisify(fs.rename)
|
||||||
|
const unlink = util.promisify(fs.unlink)
|
||||||
|
const remove = file => {
|
||||||
|
console.log("remove", file)
|
||||||
|
return unlink(file)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const promiseGetEtag = (source) => {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const {url: sourceUrl} = source
|
||||||
|
const parsed = url.parse(sourceUrl)
|
||||||
|
const requestOptions = {
|
||||||
|
hostname: parsed.hostname,
|
||||||
|
path: parsed.path,
|
||||||
|
method: 'HEAD'
|
||||||
|
}
|
||||||
|
|
||||||
|
const request = https.request(requestOptions, _.flow(
|
||||||
|
_.get(['headers', 'etag']),
|
||||||
|
resolve
|
||||||
|
))
|
||||||
|
|
||||||
|
request.on('error', reject)
|
||||||
|
|
||||||
|
request.end()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
const download = _.curry((dstDir, source) => {
|
||||||
|
console.log("download", source)
|
||||||
|
const {url: sourceUrl} = source
|
||||||
|
const fileName = path.basename(sourceUrl)
|
||||||
|
const dstFile = path.join(dstDir, fileName)
|
||||||
|
const file = fs.createWriteStream(dstFile)
|
||||||
|
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const request = https.get(sourceUrl, response => {
|
||||||
|
response.pipe(file);
|
||||||
|
file.on('finish', () => file.close(() => resolve(dstFile)))
|
||||||
|
})
|
||||||
|
|
||||||
|
request.on('error', reject)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
const parseToJson = srcFile => {
|
||||||
|
console.log("parseToJson", srcFile)
|
||||||
|
|
||||||
|
const dstFile = srcFile.replace(/\.xml$/, '.json')
|
||||||
|
const writeStream = fs.createWriteStream(dstFile)
|
||||||
|
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
parser.parse(srcFile, (err, profile) => {
|
||||||
|
console.log("callback", err, profile)
|
||||||
|
|
||||||
|
if (err) {
|
||||||
|
reject(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!profile) {
|
||||||
|
writeStream.end()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const json = JSON.stringify(profile)
|
||||||
|
writeStream.write(json + '\n', 'utf-8')
|
||||||
|
})
|
||||||
|
|
||||||
|
writeStream.on('error', reject)
|
||||||
|
writeStream.on('finish', () => resolve(dstFile))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
const moveToSourcesDir = srcFile => {
|
||||||
|
console.log("moveToSourcesDir", srcFile)
|
||||||
|
const name = path.basename(srcFile)
|
||||||
|
const dstFile = path.join(OFAC_SOURCES_DIR, name)
|
||||||
|
return rename(srcFile, dstFile)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function update () {
|
||||||
|
const promiseOldEtags = readFile(OFAC_ETAGS_FILE, {encoding: 'utf-8'})
|
||||||
|
.then(json => JSON.parse(json) || {})
|
||||||
|
|
||||||
|
const promiseNewEtags = readFile(OFAC_SOURCES_FILE, {encoding: 'utf-8'})
|
||||||
|
.then(json => {
|
||||||
|
const obj = JSON.parse(json)
|
||||||
|
return obj ? obj.sources : []
|
||||||
|
})
|
||||||
|
.then(sources => Promise.all(_.map(promiseGetEtag, sources))
|
||||||
|
.then(etags => _.map(
|
||||||
|
([source, etag]) => ({...source, etag}),
|
||||||
|
_.zip(sources, etags)
|
||||||
|
))
|
||||||
|
)
|
||||||
|
|
||||||
|
return Promise.all([promiseOldEtags, promiseNewEtags])
|
||||||
|
.then(([oldEtags, newEtags]) => {
|
||||||
|
console.log("OLD", JSON.stringify(oldEtags, null, 4))
|
||||||
|
console.log("NEW", JSON.stringify(newEtags, null, 4))
|
||||||
|
|
||||||
|
const hasNotChanged = ({name, etag}) => oldEtags[name] === etag
|
||||||
|
|
||||||
|
const downloads = _.flow(
|
||||||
|
_.reject(hasNotChanged),
|
||||||
|
_.map(file => download(DOWNLOAD_DIR, file).then(parseToJson))
|
||||||
|
)(newEtags)
|
||||||
|
|
||||||
|
const oldFileNames = _.keys(oldEtags)
|
||||||
|
const newFileNames = _.map(_.get('name'), newEtags)
|
||||||
|
const missingFileNames = _.difference(oldFileNames, newFileNames)
|
||||||
|
const resolve = name => path.join(OFAC_SOURCES_DIR, name + '.json')
|
||||||
|
const missing = _.map(resolve, missingFileNames)
|
||||||
|
|
||||||
|
const etagsJson = _.flow(
|
||||||
|
_.map(source => [source.name, source.etag]),
|
||||||
|
_.fromPairs,
|
||||||
|
obj => JSON.stringify(obj, null, 4)
|
||||||
|
)(newEtags)
|
||||||
|
|
||||||
|
return Promise.all(downloads)
|
||||||
|
.then(parsed => {
|
||||||
|
console.log("finished", parsed)
|
||||||
|
|
||||||
|
const moves = _.map(moveToSourcesDir, parsed)
|
||||||
|
const deletions = _.map(remove, missing)
|
||||||
|
const updateEtags = writeFile(OFAC_ETAGS_FILE, etagsJson)
|
||||||
|
|
||||||
|
return Promise.all([updateEtags, ...moves, ...deletions])
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {update}
|
||||||
20
package-lock.json
generated
20
package-lock.json
generated
|
|
@ -5855,6 +5855,17 @@
|
||||||
"integrity": "sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=",
|
"integrity": "sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"ndjson": {
|
||||||
|
"version": "1.5.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/ndjson/-/ndjson-1.5.0.tgz",
|
||||||
|
"integrity": "sha1-rmA7NrE0vOw0e0UkIrC/mNWDLsg=",
|
||||||
|
"requires": {
|
||||||
|
"json-stringify-safe": "5.0.1",
|
||||||
|
"minimist": "1.2.0",
|
||||||
|
"split2": "2.2.0",
|
||||||
|
"through2": "2.0.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
"negotiator": {
|
"negotiator": {
|
||||||
"version": "0.6.1",
|
"version": "0.6.1",
|
||||||
"resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.1.tgz",
|
"resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.1.tgz",
|
||||||
|
|
@ -7738,6 +7749,14 @@
|
||||||
"through": "2.3.8"
|
"through": "2.3.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"split2": {
|
||||||
|
"version": "2.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/split2/-/split2-2.2.0.tgz",
|
||||||
|
"integrity": "sha512-RAb22TG39LhI31MbreBgIuKiIKhVsawfTgEGqKHTK87aG+ul/PB8Sqoi3I7kVdRWiCfrKxK3uo4/YUkpNvhPbw==",
|
||||||
|
"requires": {
|
||||||
|
"through2": "2.0.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
"sprintf-js": {
|
"sprintf-js": {
|
||||||
"version": "1.0.3",
|
"version": "1.0.3",
|
||||||
"resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz",
|
"resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz",
|
||||||
|
|
@ -8003,7 +8022,6 @@
|
||||||
"version": "2.0.3",
|
"version": "2.0.3",
|
||||||
"resolved": "https://registry.npmjs.org/through2/-/through2-2.0.3.tgz",
|
"resolved": "https://registry.npmjs.org/through2/-/through2-2.0.3.tgz",
|
||||||
"integrity": "sha1-AARWmzfHx0ujnEPzzteNGtlBQL4=",
|
"integrity": "sha1-AARWmzfHx0ujnEPzzteNGtlBQL4=",
|
||||||
"dev": true,
|
|
||||||
"requires": {
|
"requires": {
|
||||||
"readable-stream": "2.3.3",
|
"readable-stream": "2.3.3",
|
||||||
"xtend": "4.0.1"
|
"xtend": "4.0.1"
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,7 @@
|
||||||
"minimist": "^1.2.0",
|
"minimist": "^1.2.0",
|
||||||
"moment": "^2.17.0",
|
"moment": "^2.17.0",
|
||||||
"morgan": "^1.8.2",
|
"morgan": "^1.8.2",
|
||||||
|
"ndjson": "^1.5.0",
|
||||||
"node-hkdf-sync": "^1.0.0",
|
"node-hkdf-sync": "^1.0.0",
|
||||||
"node-mailjet": "^3.2.1",
|
"node-mailjet": "^3.2.1",
|
||||||
"numeral": "^2.0.3",
|
"numeral": "^2.0.3",
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,10 @@ let fullNames
|
||||||
|
|
||||||
const rand = N => _.random(0, N - 1)
|
const rand = N => _.random(0, N - 1)
|
||||||
|
|
||||||
|
const letters = _.range('a'.charCodeAt(0), 'z'.charCodeAt(0))
|
||||||
|
const vowels = _.map(c => c.charCodeAt(0), ['a', 'e', 'i', 'o', 'u'])
|
||||||
|
const consonants = _.difference(letters, vowels)
|
||||||
|
|
||||||
const duplicate = (word, index) => {
|
const duplicate = (word, index) => {
|
||||||
const c = word[index]
|
const c = word[index]
|
||||||
return _.join('', [word.slice(0, index), c, c, word.slice(index + 1)])
|
return _.join('', [word.slice(0, index), c, c, word.slice(index + 1)])
|
||||||
|
|
@ -25,10 +29,10 @@ const transpose = (word, index) => {
|
||||||
}
|
}
|
||||||
|
|
||||||
const alter = (word, index) => {
|
const alter = (word, index) => {
|
||||||
const c = word.charCodeAt(index)
|
const o = word.charCodeAt(index)
|
||||||
const o = c - 'a'.charCodeAt(0)
|
const collection = _.includes(o, vowels) ? vowels : consonants
|
||||||
const oo = (o + _.random(1, 26)) % 26
|
const oo = _.sample(collection)
|
||||||
const cc = String.fromCharCode(oo + 'a'.charCodeAt(0))
|
const cc = String.fromCharCode(oo)
|
||||||
return _.join('', [word.slice(0, index), cc, word.slice(index + 1)])
|
return _.join('', [word.slice(0, index), cc, word.slice(index + 1)])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -54,15 +58,14 @@ const misspellRandomly = word => {
|
||||||
|
|
||||||
|
|
||||||
const shiftVowel = word => {
|
const shiftVowel = word => {
|
||||||
const vowels = 'aeiou'
|
|
||||||
|
|
||||||
const indexedVowels = _.flow(
|
const indexedVowels = _.flow(
|
||||||
_.get('length'),
|
_.get('length'),
|
||||||
_.range(0),
|
_.range(0),
|
||||||
_.zip(_.split('', word)),
|
_.zip(_.split('', word)),
|
||||||
_.map(_.zipObject(['letter', 'index'])),
|
_.map(_.zipObject(['letter', 'index'])),
|
||||||
_.map(indexedLetter => {
|
_.map(indexedLetter => {
|
||||||
const vowelIndex = _.indexOf(indexedLetter.letter, vowels)
|
const ord = indexedLetter.letter.charCodeAt(0)
|
||||||
|
const vowelIndex = _.indexOf(ord, vowels)
|
||||||
return {...indexedLetter, vowelIndex}
|
return {...indexedLetter, vowelIndex}
|
||||||
}),
|
}),
|
||||||
_.reject(_.flow(
|
_.reject(_.flow(
|
||||||
|
|
@ -78,7 +81,8 @@ const shiftVowel = word => {
|
||||||
: indexedVowel.vowelIndex === 4 ? [ -1 ]
|
: indexedVowel.vowelIndex === 4 ? [ -1 ]
|
||||||
: [ -1, +1 ]
|
: [ -1, +1 ]
|
||||||
const offset = _.sample(options)
|
const offset = _.sample(options)
|
||||||
const replacement = vowels[indexedVowel.vowelIndex + offset]
|
const replacementOrd = vowels[indexedVowel.vowelIndex + offset]
|
||||||
|
const replacement = String.fromCharCode(replacementOrd)
|
||||||
|
|
||||||
const index = indexedVowel.index
|
const index = indexedVowel.index
|
||||||
return _.join('', [word.slice(0, index), replacement, word.slice(index + 1)])
|
return _.join('', [word.slice(0, index), replacement, word.slice(index + 1)])
|
||||||
|
|
@ -110,6 +114,8 @@ const transcribe = word => {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const threshold = 0.85
|
||||||
|
|
||||||
describe('OFAC', function () {
|
describe('OFAC', function () {
|
||||||
describe('Matching', function () {
|
describe('Matching', function () {
|
||||||
|
|
||||||
|
|
@ -130,7 +136,9 @@ describe('OFAC', function () {
|
||||||
this.timeout(0)
|
this.timeout(0)
|
||||||
|
|
||||||
for (const fullName of fullNames) {
|
for (const fullName of fullNames) {
|
||||||
const matches = ofac.match({firstName: fullName}, null, 1)
|
const matches = ofac.match({firstName: fullName}, null, {
|
||||||
|
threshold,//: 1
|
||||||
|
})
|
||||||
assert.ok(!_.isEmpty(matches))
|
assert.ok(!_.isEmpty(matches))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
@ -145,7 +153,9 @@ describe('OFAC', function () {
|
||||||
_.join(' ')
|
_.join(' ')
|
||||||
)(fullName)
|
)(fullName)
|
||||||
|
|
||||||
const matches = ofac.match({firstName: reversed}, null, 1)
|
const matches = ofac.match({firstName: reversed}, null, {
|
||||||
|
threshold,//: 1
|
||||||
|
})
|
||||||
assert.ok(!_.isEmpty(matches))
|
assert.ok(!_.isEmpty(matches))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
@ -162,13 +172,29 @@ describe('OFAC', function () {
|
||||||
_.join(' ')
|
_.join(' ')
|
||||||
)(fullName)
|
)(fullName)
|
||||||
|
|
||||||
const matchesA = ofac.match({firstName: lightlyMisspelled}, null, 0.85)
|
const matchesA = ofac.match({firstName: lightlyMisspelled}, null, {
|
||||||
if (matchesA.length === 0) { console.log(1, fullName, '|', lightlyMisspelled) }
|
threshold,//: 0.875
|
||||||
assert.ok(matchesA.length > 0)
|
})
|
||||||
|
if (_.isEmpty(matchesA)) {
|
||||||
|
console.log(fullName)
|
||||||
|
ofac.match({firstName: lightlyMisspelled}, null, {
|
||||||
|
threshold,//: 0.875,
|
||||||
|
debug: true
|
||||||
|
})
|
||||||
|
}
|
||||||
|
assert.ok(!_.isEmpty(matchesA))
|
||||||
|
|
||||||
const matchesB = ofac.match({firstName: heavilyMisspelled}, null, 0.75)
|
const matchesB = ofac.match({firstName: heavilyMisspelled}, null, {
|
||||||
if (matchesB.length === 0) { console.log(2, fullName, '|', heavilyMisspelled) }
|
threshold: threshold - 0.1,//: 0.75
|
||||||
assert.ok(matchesB.length > 0)
|
})
|
||||||
|
if (_.isEmpty(matchesB)) {
|
||||||
|
console.log(fullName)
|
||||||
|
ofac.match({firstName: heavilyMisspelled}, null, {
|
||||||
|
threshold: threshold - 0.1,//: 0.75,
|
||||||
|
debug: true
|
||||||
|
})
|
||||||
|
}
|
||||||
|
assert.ok(!_.isEmpty(matchesB))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
@ -183,8 +209,16 @@ describe('OFAC', function () {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
const matches = ofac.match({firstName: transcribed}, null, 0.85)
|
const matches = ofac.match({firstName: transcribed}, null, {
|
||||||
if (matches.length === 0) { console.log(fullName, '|', transcribed) }
|
threshold,//: 0.85
|
||||||
|
})
|
||||||
|
if (_.isEmpty(matches)) {
|
||||||
|
console.log(fullName)
|
||||||
|
ofac.match({firstName: transcribed}, null, {
|
||||||
|
threshold,//: 0.85,
|
||||||
|
debug: true
|
||||||
|
})
|
||||||
|
}
|
||||||
assert.ok(!_.isEmpty(matches))
|
assert.ok(!_.isEmpty(matches))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
@ -204,7 +238,9 @@ describe('OFAC', function () {
|
||||||
))
|
))
|
||||||
|
|
||||||
for (const fullName of fullNames) {
|
for (const fullName of fullNames) {
|
||||||
const matches = ofac.match({firstName: fullName}, dateString, 1)
|
const matches = ofac.match({firstName: fullName}, dateString, {
|
||||||
|
threshold,//: 1
|
||||||
|
})
|
||||||
assert.ok(noMatchesWithBirthDates(matches))
|
assert.ok(noMatchesWithBirthDates(matches))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
@ -228,16 +264,42 @@ describe('OFAC', function () {
|
||||||
|
|
||||||
for (const lastName of lastNames.slice(0, 100)) {
|
for (const lastName of lastNames.slice(0, 100)) {
|
||||||
for (firstName of firstNamesMale.slice(0, 100)) {
|
for (firstName of firstNamesMale.slice(0, 100)) {
|
||||||
const matches = ofac.match({firstName, lastName}, null, 0.85)
|
const matches = ofac.match({firstName, lastName}, null, {
|
||||||
|
threshold,//: 0.875
|
||||||
|
})
|
||||||
|
if (!_.isEmpty(matches)) {
|
||||||
|
ofac.match({firstName, lastName}, null, {
|
||||||
|
threshold,//: 0.875,
|
||||||
|
debug: true
|
||||||
|
})
|
||||||
|
}
|
||||||
assert.ok(_.isEmpty(matches))
|
assert.ok(_.isEmpty(matches))
|
||||||
}
|
}
|
||||||
|
|
||||||
for (firstName of firstNamesFemale.slice(0, 100)) {
|
for (firstName of firstNamesFemale.slice(0, 100)) {
|
||||||
const matches = ofac.match({firstName, lastName}, null, 0.85)
|
const matches = ofac.match({firstName, lastName}, null, {
|
||||||
|
threshold,//: 0.875
|
||||||
|
})
|
||||||
|
if (!_.isEmpty(matches)) {
|
||||||
|
ofac.match({firstName, lastName}, null, {
|
||||||
|
threshold,//: 0.875,
|
||||||
|
debug: true
|
||||||
|
})
|
||||||
|
}
|
||||||
assert.ok(_.isEmpty(matches))
|
assert.ok(_.isEmpty(matches))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
it.skip('test', function () {
|
||||||
|
const firstName = 'hian chariapaporn'
|
||||||
|
ofac.match({firstName}, null, {
|
||||||
|
threshold,//: 0.875,
|
||||||
|
debug: true,
|
||||||
|
verboseFor: ['hiran', 'chariapaporn']
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
|
||||||
|
|
@ -134,148 +134,53 @@ const individualB = {id: '11', aliases: [{id: '15',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const parseIndividuals = source => {
|
||||||
|
const individuals = []
|
||||||
|
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
parser.parse(source, (err, profile) => {
|
||||||
|
if (err) {
|
||||||
|
reject(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!profile) {
|
||||||
|
resolve(individuals)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
individuals.push(profile)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
describe('OFAC', function () {
|
describe('OFAC', function () {
|
||||||
describe('Parsing', function () {
|
describe('Parsing', function () {
|
||||||
|
|
||||||
// To detect botched downloads
|
// To detect botched downloads
|
||||||
it('should fail on malformed XML', function () {
|
it('should fail on malformed XML', function () {
|
||||||
const xml = '<a><b></a>'
|
const xml = '<a><b></a>'
|
||||||
return makeDataFiles([xml]).then(parser.parse)
|
return makeDataFiles([xml])
|
||||||
|
.then(files => Promise.all(_.map(parseIndividuals, files)))
|
||||||
.catch(error => {
|
.catch(error => {
|
||||||
assert.ok(error instanceof Error)
|
assert.ok(error instanceof Error)
|
||||||
return true
|
return 'failed'
|
||||||
})
|
})
|
||||||
.then(ret => {
|
.then(ret => {
|
||||||
assert.equal(ret, true)
|
assert.equal(ret, 'failed')
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
it('should return the expected structs', function () {
|
it('should return the expected individuals', function () {
|
||||||
const xml = makeXml([individualA])
|
const xml = makeXml([individualA, individualB])
|
||||||
|
|
||||||
return makeDataFiles([xml]).then(parser.parse)
|
return makeDataFiles([xml])
|
||||||
.then(structs => {
|
.then(files => Promise.all(_.map(parseIndividuals, files)))
|
||||||
const {individuals} = structs
|
.then(([individuals]) => {
|
||||||
assert.ok(Array.isArray(individuals))
|
|
||||||
assert.equal(individuals.length, 1)
|
|
||||||
assert.deepEqual(individuals[0], individualA)
|
|
||||||
|
|
||||||
const {individualsMap} = structs
|
|
||||||
assert.ok(individualsMap instanceof Map)
|
|
||||||
assert.equal(individualsMap.size, 1)
|
|
||||||
assert.ok(individualsMap.has('9'))
|
|
||||||
assert.deepEqual(individualsMap.get('9'), individualA)
|
|
||||||
|
|
||||||
const {aliasToIndividual} = structs
|
|
||||||
assert.ok(aliasToIndividual instanceof Map)
|
|
||||||
assert.equal(aliasToIndividual.size, 1)
|
|
||||||
assert.ok(aliasToIndividual.has('5'))
|
|
||||||
assert.strictEqual(aliasToIndividual.get('5'), '9')
|
|
||||||
|
|
||||||
const {phoneticMap} = structs
|
|
||||||
assert.ok(phoneticMap instanceof Map)
|
|
||||||
assert.equal(phoneticMap.size, 3)
|
|
||||||
assert.ok(phoneticMap.has('JN'))
|
|
||||||
assert.deepEqual(phoneticMap.get('JN'), ['5'])
|
|
||||||
assert.ok(phoneticMap.has('AN'))
|
|
||||||
assert.deepEqual(phoneticMap.get('AN'), ['5'])
|
|
||||||
assert.ok(phoneticMap.has('T'))
|
|
||||||
assert.deepEqual(phoneticMap.get('T'), ['5'])
|
|
||||||
|
|
||||||
const {wordList} = structs
|
|
||||||
assert.ok(Array.isArray(wordList))
|
|
||||||
assert.equal(wordList.length, 2)
|
|
||||||
assert.deepEqual(wordList[0], {value: 'john', aliasIds: ['5']})
|
|
||||||
assert.deepEqual(wordList[1], {value: 'doe', aliasIds: ['5']})
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should be able to combine multiple sources', function () {
|
|
||||||
const xmlA = makeXml([individualA])
|
|
||||||
const xmlB = makeXml([individualB])
|
|
||||||
|
|
||||||
return makeDataFiles([xmlA, xmlB]).then(parser.parse)
|
|
||||||
.then(structs => {
|
|
||||||
const {individuals} = structs
|
|
||||||
assert.ok(Array.isArray(individuals))
|
assert.ok(Array.isArray(individuals))
|
||||||
assert.equal(individuals.length, 2)
|
assert.equal(individuals.length, 2)
|
||||||
assert.deepEqual(individuals[0], individualA)
|
assert.deepEqual(individuals, [individualA, individualB])
|
||||||
assert.deepEqual(individuals[1], individualB)
|
|
||||||
|
|
||||||
const {individualsMap} = structs
|
|
||||||
assert.ok(individualsMap instanceof Map)
|
|
||||||
assert.equal(individualsMap.size, 2)
|
|
||||||
assert.ok(individualsMap.has('9'))
|
|
||||||
assert.deepEqual(individualsMap.get('9'), individualA)
|
|
||||||
assert.ok(individualsMap.has('11'))
|
|
||||||
assert.deepEqual(individualsMap.get('11'), individualB)
|
|
||||||
|
|
||||||
const {aliasToIndividual} = structs
|
|
||||||
assert.ok(aliasToIndividual instanceof Map)
|
|
||||||
assert.equal(aliasToIndividual.size, 2)
|
|
||||||
assert.ok(aliasToIndividual.has('5'))
|
|
||||||
assert.strictEqual(aliasToIndividual.get('5'), '9')
|
|
||||||
assert.ok(aliasToIndividual.has('15'))
|
|
||||||
assert.strictEqual(aliasToIndividual.get('15'), '11')
|
|
||||||
|
|
||||||
const {phoneticMap} = structs
|
|
||||||
assert.ok(phoneticMap instanceof Map)
|
|
||||||
assert.equal(phoneticMap.size, 4)
|
|
||||||
assert.ok(phoneticMap.has('JN'))
|
|
||||||
assert.deepEqual(phoneticMap.get('JN'), ['5', '15'])
|
|
||||||
assert.ok(phoneticMap.has('AN'))
|
|
||||||
assert.deepEqual(phoneticMap.get('AN'), ['5', '15'])
|
|
||||||
assert.ok(phoneticMap.has('T'))
|
|
||||||
assert.deepEqual(phoneticMap.get('T'), ['5', '15'])
|
|
||||||
|
|
||||||
const {wordList} = structs
|
|
||||||
assert.ok(Array.isArray(wordList))
|
|
||||||
assert.equal(wordList.length, 4)
|
|
||||||
assert.deepEqual(wordList[0], {value: 'john', aliasIds: ['5', '15']})
|
|
||||||
assert.deepEqual(wordList[1], {value: 'doe', aliasIds: ['5']})
|
|
||||||
assert.deepEqual(wordList[2], {value: 'de', aliasIds: ['15']})
|
|
||||||
assert.deepEqual(wordList[3], {value: 'gaul', aliasIds: ['15']})
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
it('should remove duplicates from multiple sources', function () {
|
|
||||||
const xmlA1 = makeXml([individualA, individualA])
|
|
||||||
const xmlA2 = makeXml([individualA])
|
|
||||||
|
|
||||||
return makeDataFiles([xmlA1, xmlA2]).then(parser.parse)
|
|
||||||
.then(structs => {
|
|
||||||
const {individuals} = structs
|
|
||||||
assert.ok(Array.isArray(individuals))
|
|
||||||
assert.equal(individuals.length, 1)
|
|
||||||
assert.deepEqual(individuals[0], individualA)
|
|
||||||
|
|
||||||
const {individualsMap} = structs
|
|
||||||
assert.ok(individualsMap instanceof Map)
|
|
||||||
assert.equal(individualsMap.size, 1)
|
|
||||||
assert.ok(individualsMap.has('9'))
|
|
||||||
assert.deepEqual(individualsMap.get('9'), individualA)
|
|
||||||
|
|
||||||
const {aliasToIndividual} = structs
|
|
||||||
assert.ok(aliasToIndividual instanceof Map)
|
|
||||||
assert.equal(aliasToIndividual.size, 1)
|
|
||||||
assert.ok(aliasToIndividual.has('5'))
|
|
||||||
assert.strictEqual(aliasToIndividual.get('5'), '9')
|
|
||||||
|
|
||||||
const {phoneticMap} = structs
|
|
||||||
assert.ok(phoneticMap instanceof Map)
|
|
||||||
assert.equal(phoneticMap.size, 3)
|
|
||||||
assert.ok(phoneticMap.has('JN'))
|
|
||||||
assert.deepEqual(phoneticMap.get('JN'), ['5'])
|
|
||||||
assert.ok(phoneticMap.has('AN'))
|
|
||||||
assert.deepEqual(phoneticMap.get('AN'), ['5'])
|
|
||||||
assert.ok(phoneticMap.has('T'))
|
|
||||||
assert.deepEqual(phoneticMap.get('T'), ['5'])
|
|
||||||
|
|
||||||
const {wordList} = structs
|
|
||||||
assert.ok(Array.isArray(wordList))
|
|
||||||
assert.equal(wordList.length, 2)
|
|
||||||
assert.deepEqual(wordList[0], {value: 'john', aliasIds: ['5']})
|
|
||||||
assert.deepEqual(wordList[1], {value: 'doe', aliasIds: ['5']})
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue