Parsing moved to downloading. Matching is being tweaked.

This commit is contained in:
Konstantin Mamalakis 2018-03-15 20:36:34 +02:00 committed by Josh Harvey
parent 793db0f449
commit b72f5549a5
10 changed files with 456 additions and 276 deletions

103
lib/ofac/loading.js Normal file
View file

@ -0,0 +1,103 @@
const fs = require('fs')
const ndjson = require('ndjson')
const _ = require('lodash/fp')
const mapAliases = _.curry((iteratee, individuals) => {
const mapIndividual = individual => {
const {id, aliases} = individual
return _.map(alias => iteratee(id, alias), aliases)
}
return _.flatMap(mapIndividual, individuals)
})
const getPhoneticEntries = (individualId, alias) => {
const pairPhoneticsWithValues = word => {
const {value, phonetics} = word
const makeEntry = phonetic => ({value, phonetic, aliasId: alias.id})
return _.map(makeEntry, phonetics)
}
return _.flatMap(pairPhoneticsWithValues, alias.words)
}
const producePhoneticMap = _.flow(
mapAliases(getPhoneticEntries),
_.flatten,
_.groupBy(_.get('phonetic')),
_.mapValues(_.flow(
_.map(_.get('aliasId')),
_.uniq
)),
_.toPairs,
entries => new Map(entries)
)
const getWords = (individualId, alias) => {
const pairWordsWithIds = word => ({value: word.value, aliasId: alias.id})
return _.map(pairWordsWithIds, alias.words)
}
const produceWordList = _.flow(
mapAliases(getWords),
_.flatten,
_.groupBy(_.get('value')),
_.mapValues(_.map(_.get('aliasId'))),
_.toPairs,
_.map(_.zipObject(['value', 'aliasIds']))
)
const parseSource = source => {
const individuals = []
const readStream = fs.createReadStream(source)
const jsonStream = readStream.pipe(ndjson.parse())
jsonStream.on('data', individual => {
individuals.push(individual)
})
return new Promise((resolve, reject) => {
jsonStream.on('error', reject)
jsonStream.on('end', () => {
resolve(individuals)
})
})
}
const load = sources => Promise.all(_.map(parseSource, sources))
.then(_.flow(
_.flatten,
_.compact,
_.uniqBy(_.get('id')),
individuals => {
const individualsMap = _.flow(
_.groupBy(_.get('id')),
_.mapValues(_.first),
_.toPairs,
entries => new Map(entries)
)(individuals)
const makeEntries = (individualId, alias) => [alias.id, alias]
const aliasesMap = new Map(mapAliases(makeEntries, individuals))
const getIdPairs = (individualId, alias) => [alias.id, individualId]
const idPairs = mapAliases(getIdPairs, individuals)
const aliasToIndividual = new Map(idPairs)
const phoneticMap = producePhoneticMap(individuals)
const wordList = produceWordList(individuals)
return {
individuals,
individualsMap,
aliasesMap,
aliasToIndividual,
phoneticMap,
wordList
}
}
))
module.exports = {load}