Parsing moved to downloading. Matching is being tweaked.
This commit is contained in:
parent
793db0f449
commit
b72f5549a5
10 changed files with 456 additions and 276 deletions
|
|
@ -139,111 +139,29 @@ function processProfile (profileNode) {
|
|||
return individual
|
||||
}
|
||||
|
||||
function promiseParseDocument (source) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const stream = fs.createReadStream(source)
|
||||
const xml = new XmlStream(stream)
|
||||
const parse = (source, callback) => {
|
||||
const stream = fs.createReadStream(source)
|
||||
const xml = new XmlStream(stream)
|
||||
|
||||
xml.on('error', err => {
|
||||
xml.pause()
|
||||
const message = `Error while parsing OFAC data source file (${source}): ${err.message}`
|
||||
reject(new Error(message))
|
||||
})
|
||||
xml.on('error', err => {
|
||||
xml.pause()
|
||||
const message = `Error while parsing OFAC data source file (${source}): ${err.message}`
|
||||
callback(new Error(message))
|
||||
})
|
||||
|
||||
xml.collect('Alias')
|
||||
xml.collect('DocumentedName')
|
||||
xml.collect('DocumentedNamePart')
|
||||
xml.collect('Feature')
|
||||
xml.collect('MasterNamePartGroup')
|
||||
xml.collect('Alias')
|
||||
xml.collect('DocumentedName')
|
||||
xml.collect('DocumentedNamePart')
|
||||
xml.collect('Feature')
|
||||
xml.collect('MasterNamePartGroup')
|
||||
|
||||
const individuals = []
|
||||
const forwardProfile = profile => profile && callback(null, profile)
|
||||
|
||||
const collectResult = result => result && individuals.push(result)
|
||||
xml.on('updateElement: Profile', _.flow(processProfile, collectResult))
|
||||
xml.on('updateElement: Profile', _.flow(processProfile, forwardProfile))
|
||||
|
||||
xml.on('end', _.wrap(resolve, individuals))
|
||||
xml.on('end', () => {
|
||||
callback(null, null)
|
||||
})
|
||||
}
|
||||
|
||||
const mapAliases = _.curry((iteratee, individuals) => {
|
||||
const mapIndividual = individual => {
|
||||
const {id, aliases} = individual
|
||||
return _.map(alias => iteratee(id, alias), aliases)
|
||||
}
|
||||
return _.flatMap(mapIndividual, individuals)
|
||||
})
|
||||
|
||||
|
||||
const getPhoneticEntries = (individualId, alias) => {
|
||||
const pairPhoneticsWithValues = word => {
|
||||
const {value, phonetics} = word
|
||||
const makeEntry = phonetic => ({value, phonetic, aliasId: alias.id})
|
||||
return _.map(makeEntry, phonetics)
|
||||
}
|
||||
return _.flatMap(pairPhoneticsWithValues, alias.words)
|
||||
}
|
||||
|
||||
const producePhoneticMap = _.flow(
|
||||
mapAliases(getPhoneticEntries),
|
||||
_.flatten,
|
||||
_.groupBy(_.get('phonetic')),
|
||||
_.mapValues(_.flow(
|
||||
_.map(_.get('aliasId')),
|
||||
_.uniq
|
||||
)),
|
||||
_.toPairs,
|
||||
entries => new Map(entries)
|
||||
)
|
||||
|
||||
|
||||
const getWords = (individualId, alias) => {
|
||||
const pairWordsWithIds = word => ({value: word.value, aliasId: alias.id})
|
||||
return _.map(pairWordsWithIds, alias.words)
|
||||
}
|
||||
|
||||
const produceWordList = _.flow(
|
||||
mapAliases(getWords),
|
||||
_.flatten,
|
||||
_.groupBy(_.get('value')),
|
||||
_.mapValues(_.map(_.get('aliasId'))),
|
||||
_.toPairs,
|
||||
_.map(_.zipObject(['value', 'aliasIds']))
|
||||
)
|
||||
|
||||
function parse (sources) {
|
||||
return Promise.all(_.map(promiseParseDocument, sources))
|
||||
.then(_.flow(
|
||||
_.flatten,
|
||||
_.compact,
|
||||
_.uniqBy(_.get('id')),
|
||||
individuals => {
|
||||
const individualsMap = _.flow(
|
||||
_.groupBy(_.get('id')),
|
||||
_.mapValues(_.first),
|
||||
_.toPairs,
|
||||
entries => new Map(entries)
|
||||
)(individuals)
|
||||
|
||||
const makeEntries = (individualId, alias) => [alias.id, alias]
|
||||
const aliasesMap = new Map(mapAliases(makeEntries, individuals))
|
||||
|
||||
const getIdPairs = (individualId, alias) => [alias.id, individualId]
|
||||
const idPairs = mapAliases(getIdPairs, individuals)
|
||||
const aliasToIndividual = new Map(idPairs)
|
||||
|
||||
const phoneticMap = producePhoneticMap(individuals)
|
||||
const wordList = produceWordList(individuals)
|
||||
|
||||
return {
|
||||
individuals,
|
||||
individualsMap,
|
||||
aliasesMap,
|
||||
aliasToIndividual,
|
||||
phoneticMap,
|
||||
wordList
|
||||
}
|
||||
}
|
||||
))
|
||||
}
|
||||
|
||||
module.exports = {parse}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue