diff --git a/lib/ofac/data-parser.js b/lib/ofac/data-parser.js index 18014222..7bd74fe9 100644 --- a/lib/ofac/data-parser.js +++ b/lib/ofac/data-parser.js @@ -50,8 +50,7 @@ const processDocumentedNamePart = _.curry((groupTypes, namePartNode) => { const typeId = groupTypes.get(groupId) const partName = partNames.get(typeId) const value = _.lowerCase(valueNode.$text) - const words = nameUtils.makeWords(value) - return {partName, value, words} + return {partName, value} }) const isLatin = _.matchesProperty(['$', 'DocNameStatusID'], PRIMARY_LATIN) @@ -68,12 +67,14 @@ const processAlias = _.curry((groupTypes, aliasNode) => { return } + const id = latinNameNode.$.ID const namePartNodes = latinNameNode.DocumentedNamePart const parts = _.map(getNamePart, namePartNodes) const fullName = nameUtils.makeFullName(parts) + const words = nameUtils.makeWords(fullName) - return {parts, fullName} + return {id, parts, fullName, words} }) // birth date @@ -121,10 +122,11 @@ function processProfile (profileNode) { if (_.isEmpty(aliases)) return const birthDatePeriods = mapCompact(processFeature, profileNode.Feature) - const individual = {aliases, birthDatePeriods} + const individual = {id, aliases, birthDatePeriods} debug_log(individual) - return [id, individual] + + return individual } function promiseParseDocument (source) { @@ -146,6 +148,7 @@ function promiseParseDocument (source) { xml.collect('MasterNamePartGroup') const individuals = [] + const collectResult = result => result && individuals.push(result) xml.on('updateElement: Profile', _.flow(processProfile, collectResult)) @@ -155,14 +158,79 @@ function promiseParseDocument (source) { const readdir = util.promisify(fs.readdir) +// const {id, individual, words} = result +// +// const individualEntry = [id, individual] +// individuals.push(individualEntry) +// +// const phoneticWithWord = pair => { +// const [word, phonetics] = pair +// const makeEntry = phonetic => ({word, phonetic, individualId: id}) +// return _.map(makeEntry, phonetics) +// } +// +// const phoneticEntries = _.flatten(_.map(phoneticWithWord, words)) +// allPhonetics.push(...phoneticEntries) + +const mapAliases = _.curry((iteratee, individuals) => { + const foreachIndividual = individual => { + const {id, aliases} = individual + return _.map(alias => iteratee(id, alias), aliases) + } + return _.flatten(_.map(foreachIndividual, individuals)) +}) + + +const getPhoneticEntries = (individualId, alias) => { + const pairPhoneticsWithWords = word => { + const {value, phonetics} = word + const makeEntry = phonetic => ({value, phonetic, aliasId: alias.id}) + return _.map(makeEntry, phonetics) + } + return _.flatten(_.map(pairPhoneticsWithWords, alias.words)) +} + +const producePhoneticMap = _.flow( + mapAliases(getPhoneticEntries), + _.flatten, + _.groupBy(_.get('phonetic')), + _.mapValues(_.map(_.pick(['value', 'aliasId']))), + _.toPairs, + entries => new Map(entries) +) + + +const getWords = (individualId, alias) => { + const pairWordsWithIds = word => ({value: word.value, aliasId: alias.id}) + return _.map(pairWordsWithIds, alias.words) +} + +const produceWordList = _.flow( + mapAliases(getWords), + _.flatten, + _.groupBy(_.get('value')), + _.mapValues(_.map(_.get('aliasId'))), + _.toPairs, + _.map(_.zipObject(['value', 'aliasIds'])) +) + const combineAndDedupe = _.flow( _.flatten, _.compact, - _.uniqBy(_.first), - _.map(_.last) + _.uniqBy(_.get('id')), + individuals => { + const getIdPairs = (individualId, alias) => [alias.id, individualId] + const idPairs = mapAliases(getIdPairs, individuals) + const aliasToIndividual = new Map(idPairs) + + const phoneticMap = producePhoneticMap(individuals) + const wordList = produceWordList(individuals) + + return {individuals, aliasToIndividual, phoneticMap, wordList} + } ) -function parseList () { +function produceStructs () { // NOTE: Not sure how you push code updates to existing clients. This problem // might pop up if new code is pushed, without re-doing setup. if (!OFAC_DATA_DIR) { @@ -175,4 +243,4 @@ function parseList () { .then(combineAndDedupe) } -module.exports = {parseList} +module.exports = {produceStructs} diff --git a/lib/ofac/index.js b/lib/ofac/index.js index 759b4674..d640155f 100644 --- a/lib/ofac/index.js +++ b/lib/ofac/index.js @@ -5,12 +5,12 @@ const _ = require('lodash/fp') const debug_log = require('../pp')(__filename) // KOSTIS TODO: remove -let individuals = null +let structs = null function load () { - return dataParser.parseList() - .then(list => { - individuals = Array.from(list) + return dataParser.produceStructs() + .then(result => { + structs = result }) } @@ -111,7 +111,7 @@ function makeCompatible (nameParts) { } function match (nameParts, birthDateString) { - if (!individuals) { + if (!structs) { const message = 'The OFAC data sources have not been loaded yet.' return Promise.reject(new Error(message)) } @@ -132,7 +132,7 @@ function match (nameParts, birthDateString) { debug_log(candidate) const similarToCandidate = similarity(candidate) - const result = mapMax(similarToCandidate, individuals) + const result = mapMax(similarToCandidate, structs.individuals) console.log(result) return result } diff --git a/lib/ofac/name-utils.js b/lib/ofac/name-utils.js index 60e46e8d..f727fe0f 100644 --- a/lib/ofac/name-utils.js +++ b/lib/ofac/name-utils.js @@ -22,9 +22,9 @@ const makeFullName = _.flow( const makeWords = value => { const words = _.split(' ', value) - const phonetic = _.map(makePhonetic, words) - const props = _.zipAll([words, phonetic]) - return _.map(_.zipObject(['value', 'phonetic']), props) + const phonetics = _.map(makePhonetic, words) + const pairs = _.zipAll([words, phonetics]) + return _.map(_.zipObject(['value', 'phonetics']), pairs) } module.exports = {