Parsing moved to downloading. Matching is being tweaked.

This commit is contained in:
Konstantin Mamalakis 2018-03-15 20:36:34 +02:00 committed by Josh Harvey
parent 793db0f449
commit b72f5549a5
10 changed files with 456 additions and 276 deletions

View file

@ -139,111 +139,29 @@ function processProfile (profileNode) {
return individual
}
function promiseParseDocument (source) {
return new Promise((resolve, reject) => {
const stream = fs.createReadStream(source)
const xml = new XmlStream(stream)
const parse = (source, callback) => {
const stream = fs.createReadStream(source)
const xml = new XmlStream(stream)
xml.on('error', err => {
xml.pause()
const message = `Error while parsing OFAC data source file (${source}): ${err.message}`
reject(new Error(message))
})
xml.on('error', err => {
xml.pause()
const message = `Error while parsing OFAC data source file (${source}): ${err.message}`
callback(new Error(message))
})
xml.collect('Alias')
xml.collect('DocumentedName')
xml.collect('DocumentedNamePart')
xml.collect('Feature')
xml.collect('MasterNamePartGroup')
xml.collect('Alias')
xml.collect('DocumentedName')
xml.collect('DocumentedNamePart')
xml.collect('Feature')
xml.collect('MasterNamePartGroup')
const individuals = []
const forwardProfile = profile => profile && callback(null, profile)
const collectResult = result => result && individuals.push(result)
xml.on('updateElement: Profile', _.flow(processProfile, collectResult))
xml.on('updateElement: Profile', _.flow(processProfile, forwardProfile))
xml.on('end', _.wrap(resolve, individuals))
xml.on('end', () => {
callback(null, null)
})
}
const mapAliases = _.curry((iteratee, individuals) => {
const mapIndividual = individual => {
const {id, aliases} = individual
return _.map(alias => iteratee(id, alias), aliases)
}
return _.flatMap(mapIndividual, individuals)
})
const getPhoneticEntries = (individualId, alias) => {
const pairPhoneticsWithValues = word => {
const {value, phonetics} = word
const makeEntry = phonetic => ({value, phonetic, aliasId: alias.id})
return _.map(makeEntry, phonetics)
}
return _.flatMap(pairPhoneticsWithValues, alias.words)
}
const producePhoneticMap = _.flow(
mapAliases(getPhoneticEntries),
_.flatten,
_.groupBy(_.get('phonetic')),
_.mapValues(_.flow(
_.map(_.get('aliasId')),
_.uniq
)),
_.toPairs,
entries => new Map(entries)
)
const getWords = (individualId, alias) => {
const pairWordsWithIds = word => ({value: word.value, aliasId: alias.id})
return _.map(pairWordsWithIds, alias.words)
}
const produceWordList = _.flow(
mapAliases(getWords),
_.flatten,
_.groupBy(_.get('value')),
_.mapValues(_.map(_.get('aliasId'))),
_.toPairs,
_.map(_.zipObject(['value', 'aliasIds']))
)
function parse (sources) {
return Promise.all(_.map(promiseParseDocument, sources))
.then(_.flow(
_.flatten,
_.compact,
_.uniqBy(_.get('id')),
individuals => {
const individualsMap = _.flow(
_.groupBy(_.get('id')),
_.mapValues(_.first),
_.toPairs,
entries => new Map(entries)
)(individuals)
const makeEntries = (individualId, alias) => [alias.id, alias]
const aliasesMap = new Map(mapAliases(makeEntries, individuals))
const getIdPairs = (individualId, alias) => [alias.id, individualId]
const idPairs = mapAliases(getIdPairs, individuals)
const aliasToIndividual = new Map(idPairs)
const phoneticMap = producePhoneticMap(individuals)
const wordList = produceWordList(individuals)
return {
individuals,
individualsMap,
aliasesMap,
aliasToIndividual,
phoneticMap,
wordList
}
}
))
}
module.exports = {parse}