Created new data structures for the upcomming matching
This commit is contained in:
parent
620863d703
commit
2f8c798304
3 changed files with 86 additions and 18 deletions
|
|
@ -50,8 +50,7 @@ const processDocumentedNamePart = _.curry((groupTypes, namePartNode) => {
|
||||||
const typeId = groupTypes.get(groupId)
|
const typeId = groupTypes.get(groupId)
|
||||||
const partName = partNames.get(typeId)
|
const partName = partNames.get(typeId)
|
||||||
const value = _.lowerCase(valueNode.$text)
|
const value = _.lowerCase(valueNode.$text)
|
||||||
const words = nameUtils.makeWords(value)
|
return {partName, value}
|
||||||
return {partName, value, words}
|
|
||||||
})
|
})
|
||||||
|
|
||||||
const isLatin = _.matchesProperty(['$', 'DocNameStatusID'], PRIMARY_LATIN)
|
const isLatin = _.matchesProperty(['$', 'DocNameStatusID'], PRIMARY_LATIN)
|
||||||
|
|
@ -68,12 +67,14 @@ const processAlias = _.curry((groupTypes, aliasNode) => {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const id = latinNameNode.$.ID
|
||||||
const namePartNodes = latinNameNode.DocumentedNamePart
|
const namePartNodes = latinNameNode.DocumentedNamePart
|
||||||
const parts = _.map(getNamePart, namePartNodes)
|
const parts = _.map(getNamePart, namePartNodes)
|
||||||
|
|
||||||
const fullName = nameUtils.makeFullName(parts)
|
const fullName = nameUtils.makeFullName(parts)
|
||||||
|
const words = nameUtils.makeWords(fullName)
|
||||||
|
|
||||||
return {parts, fullName}
|
return {id, parts, fullName, words}
|
||||||
})
|
})
|
||||||
|
|
||||||
// birth date
|
// birth date
|
||||||
|
|
@ -121,10 +122,11 @@ function processProfile (profileNode) {
|
||||||
if (_.isEmpty(aliases)) return
|
if (_.isEmpty(aliases)) return
|
||||||
|
|
||||||
const birthDatePeriods = mapCompact(processFeature, profileNode.Feature)
|
const birthDatePeriods = mapCompact(processFeature, profileNode.Feature)
|
||||||
const individual = {aliases, birthDatePeriods}
|
const individual = {id, aliases, birthDatePeriods}
|
||||||
|
|
||||||
debug_log(individual)
|
debug_log(individual)
|
||||||
return [id, individual]
|
|
||||||
|
return individual
|
||||||
}
|
}
|
||||||
|
|
||||||
function promiseParseDocument (source) {
|
function promiseParseDocument (source) {
|
||||||
|
|
@ -146,6 +148,7 @@ function promiseParseDocument (source) {
|
||||||
xml.collect('MasterNamePartGroup')
|
xml.collect('MasterNamePartGroup')
|
||||||
|
|
||||||
const individuals = []
|
const individuals = []
|
||||||
|
|
||||||
const collectResult = result => result && individuals.push(result)
|
const collectResult = result => result && individuals.push(result)
|
||||||
xml.on('updateElement: Profile', _.flow(processProfile, collectResult))
|
xml.on('updateElement: Profile', _.flow(processProfile, collectResult))
|
||||||
|
|
||||||
|
|
@ -155,14 +158,79 @@ function promiseParseDocument (source) {
|
||||||
|
|
||||||
const readdir = util.promisify(fs.readdir)
|
const readdir = util.promisify(fs.readdir)
|
||||||
|
|
||||||
|
// const {id, individual, words} = result
|
||||||
|
//
|
||||||
|
// const individualEntry = [id, individual]
|
||||||
|
// individuals.push(individualEntry)
|
||||||
|
//
|
||||||
|
// const phoneticWithWord = pair => {
|
||||||
|
// const [word, phonetics] = pair
|
||||||
|
// const makeEntry = phonetic => ({word, phonetic, individualId: id})
|
||||||
|
// return _.map(makeEntry, phonetics)
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// const phoneticEntries = _.flatten(_.map(phoneticWithWord, words))
|
||||||
|
// allPhonetics.push(...phoneticEntries)
|
||||||
|
|
||||||
|
const mapAliases = _.curry((iteratee, individuals) => {
|
||||||
|
const foreachIndividual = individual => {
|
||||||
|
const {id, aliases} = individual
|
||||||
|
return _.map(alias => iteratee(id, alias), aliases)
|
||||||
|
}
|
||||||
|
return _.flatten(_.map(foreachIndividual, individuals))
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
const getPhoneticEntries = (individualId, alias) => {
|
||||||
|
const pairPhoneticsWithWords = word => {
|
||||||
|
const {value, phonetics} = word
|
||||||
|
const makeEntry = phonetic => ({value, phonetic, aliasId: alias.id})
|
||||||
|
return _.map(makeEntry, phonetics)
|
||||||
|
}
|
||||||
|
return _.flatten(_.map(pairPhoneticsWithWords, alias.words))
|
||||||
|
}
|
||||||
|
|
||||||
|
const producePhoneticMap = _.flow(
|
||||||
|
mapAliases(getPhoneticEntries),
|
||||||
|
_.flatten,
|
||||||
|
_.groupBy(_.get('phonetic')),
|
||||||
|
_.mapValues(_.map(_.pick(['value', 'aliasId']))),
|
||||||
|
_.toPairs,
|
||||||
|
entries => new Map(entries)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
const getWords = (individualId, alias) => {
|
||||||
|
const pairWordsWithIds = word => ({value: word.value, aliasId: alias.id})
|
||||||
|
return _.map(pairWordsWithIds, alias.words)
|
||||||
|
}
|
||||||
|
|
||||||
|
const produceWordList = _.flow(
|
||||||
|
mapAliases(getWords),
|
||||||
|
_.flatten,
|
||||||
|
_.groupBy(_.get('value')),
|
||||||
|
_.mapValues(_.map(_.get('aliasId'))),
|
||||||
|
_.toPairs,
|
||||||
|
_.map(_.zipObject(['value', 'aliasIds']))
|
||||||
|
)
|
||||||
|
|
||||||
const combineAndDedupe = _.flow(
|
const combineAndDedupe = _.flow(
|
||||||
_.flatten,
|
_.flatten,
|
||||||
_.compact,
|
_.compact,
|
||||||
_.uniqBy(_.first),
|
_.uniqBy(_.get('id')),
|
||||||
_.map(_.last)
|
individuals => {
|
||||||
|
const getIdPairs = (individualId, alias) => [alias.id, individualId]
|
||||||
|
const idPairs = mapAliases(getIdPairs, individuals)
|
||||||
|
const aliasToIndividual = new Map(idPairs)
|
||||||
|
|
||||||
|
const phoneticMap = producePhoneticMap(individuals)
|
||||||
|
const wordList = produceWordList(individuals)
|
||||||
|
|
||||||
|
return {individuals, aliasToIndividual, phoneticMap, wordList}
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
function parseList () {
|
function produceStructs () {
|
||||||
// NOTE: Not sure how you push code updates to existing clients. This problem
|
// NOTE: Not sure how you push code updates to existing clients. This problem
|
||||||
// might pop up if new code is pushed, without re-doing setup.
|
// might pop up if new code is pushed, without re-doing setup.
|
||||||
if (!OFAC_DATA_DIR) {
|
if (!OFAC_DATA_DIR) {
|
||||||
|
|
@ -175,4 +243,4 @@ function parseList () {
|
||||||
.then(combineAndDedupe)
|
.then(combineAndDedupe)
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {parseList}
|
module.exports = {produceStructs}
|
||||||
|
|
|
||||||
|
|
@ -5,12 +5,12 @@ const _ = require('lodash/fp')
|
||||||
|
|
||||||
const debug_log = require('../pp')(__filename) // KOSTIS TODO: remove
|
const debug_log = require('../pp')(__filename) // KOSTIS TODO: remove
|
||||||
|
|
||||||
let individuals = null
|
let structs = null
|
||||||
|
|
||||||
function load () {
|
function load () {
|
||||||
return dataParser.parseList()
|
return dataParser.produceStructs()
|
||||||
.then(list => {
|
.then(result => {
|
||||||
individuals = Array.from(list)
|
structs = result
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -111,7 +111,7 @@ function makeCompatible (nameParts) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function match (nameParts, birthDateString) {
|
function match (nameParts, birthDateString) {
|
||||||
if (!individuals) {
|
if (!structs) {
|
||||||
const message = 'The OFAC data sources have not been loaded yet.'
|
const message = 'The OFAC data sources have not been loaded yet.'
|
||||||
return Promise.reject(new Error(message))
|
return Promise.reject(new Error(message))
|
||||||
}
|
}
|
||||||
|
|
@ -132,7 +132,7 @@ function match (nameParts, birthDateString) {
|
||||||
debug_log(candidate)
|
debug_log(candidate)
|
||||||
|
|
||||||
const similarToCandidate = similarity(candidate)
|
const similarToCandidate = similarity(candidate)
|
||||||
const result = mapMax(similarToCandidate, individuals)
|
const result = mapMax(similarToCandidate, structs.individuals)
|
||||||
console.log(result)
|
console.log(result)
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -22,9 +22,9 @@ const makeFullName = _.flow(
|
||||||
|
|
||||||
const makeWords = value => {
|
const makeWords = value => {
|
||||||
const words = _.split(' ', value)
|
const words = _.split(' ', value)
|
||||||
const phonetic = _.map(makePhonetic, words)
|
const phonetics = _.map(makePhonetic, words)
|
||||||
const props = _.zipAll([words, phonetic])
|
const pairs = _.zipAll([words, phonetics])
|
||||||
return _.map(_.zipObject(['value', 'phonetic']), props)
|
return _.map(_.zipObject(['value', 'phonetics']), pairs)
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue