Parsing moved to downloading. Matching is being tweaked.
This commit is contained in:
parent
793db0f449
commit
b72f5549a5
10 changed files with 456 additions and 276 deletions
103
lib/ofac/loading.js
Normal file
103
lib/ofac/loading.js
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
const fs = require('fs')
|
||||
const ndjson = require('ndjson')
|
||||
const _ = require('lodash/fp')
|
||||
|
||||
|
||||
const mapAliases = _.curry((iteratee, individuals) => {
|
||||
const mapIndividual = individual => {
|
||||
const {id, aliases} = individual
|
||||
return _.map(alias => iteratee(id, alias), aliases)
|
||||
}
|
||||
return _.flatMap(mapIndividual, individuals)
|
||||
})
|
||||
|
||||
|
||||
const getPhoneticEntries = (individualId, alias) => {
|
||||
const pairPhoneticsWithValues = word => {
|
||||
const {value, phonetics} = word
|
||||
const makeEntry = phonetic => ({value, phonetic, aliasId: alias.id})
|
||||
return _.map(makeEntry, phonetics)
|
||||
}
|
||||
return _.flatMap(pairPhoneticsWithValues, alias.words)
|
||||
}
|
||||
|
||||
const producePhoneticMap = _.flow(
|
||||
mapAliases(getPhoneticEntries),
|
||||
_.flatten,
|
||||
_.groupBy(_.get('phonetic')),
|
||||
_.mapValues(_.flow(
|
||||
_.map(_.get('aliasId')),
|
||||
_.uniq
|
||||
)),
|
||||
_.toPairs,
|
||||
entries => new Map(entries)
|
||||
)
|
||||
|
||||
|
||||
const getWords = (individualId, alias) => {
|
||||
const pairWordsWithIds = word => ({value: word.value, aliasId: alias.id})
|
||||
return _.map(pairWordsWithIds, alias.words)
|
||||
}
|
||||
|
||||
const produceWordList = _.flow(
|
||||
mapAliases(getWords),
|
||||
_.flatten,
|
||||
_.groupBy(_.get('value')),
|
||||
_.mapValues(_.map(_.get('aliasId'))),
|
||||
_.toPairs,
|
||||
_.map(_.zipObject(['value', 'aliasIds']))
|
||||
)
|
||||
|
||||
const parseSource = source => {
|
||||
const individuals = []
|
||||
|
||||
const readStream = fs.createReadStream(source)
|
||||
const jsonStream = readStream.pipe(ndjson.parse())
|
||||
jsonStream.on('data', individual => {
|
||||
individuals.push(individual)
|
||||
})
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
jsonStream.on('error', reject)
|
||||
jsonStream.on('end', () => {
|
||||
resolve(individuals)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
const load = sources => Promise.all(_.map(parseSource, sources))
|
||||
.then(_.flow(
|
||||
_.flatten,
|
||||
_.compact,
|
||||
_.uniqBy(_.get('id')),
|
||||
individuals => {
|
||||
|
||||
const individualsMap = _.flow(
|
||||
_.groupBy(_.get('id')),
|
||||
_.mapValues(_.first),
|
||||
_.toPairs,
|
||||
entries => new Map(entries)
|
||||
)(individuals)
|
||||
|
||||
const makeEntries = (individualId, alias) => [alias.id, alias]
|
||||
const aliasesMap = new Map(mapAliases(makeEntries, individuals))
|
||||
|
||||
const getIdPairs = (individualId, alias) => [alias.id, individualId]
|
||||
const idPairs = mapAliases(getIdPairs, individuals)
|
||||
const aliasToIndividual = new Map(idPairs)
|
||||
|
||||
const phoneticMap = producePhoneticMap(individuals)
|
||||
const wordList = produceWordList(individuals)
|
||||
|
||||
return {
|
||||
individuals,
|
||||
individualsMap,
|
||||
aliasesMap,
|
||||
aliasToIndividual,
|
||||
phoneticMap,
|
||||
wordList
|
||||
}
|
||||
}
|
||||
))
|
||||
|
||||
module.exports = {load}
|
||||
Loading…
Add table
Add a link
Reference in a new issue