diff --git a/lib/ofac/data-parser.js b/lib/ofac/data-parser.js index 2363e95f..bc77cca6 100644 --- a/lib/ofac/data-parser.js +++ b/lib/ofac/data-parser.js @@ -4,6 +4,7 @@ const util = require('util') const XmlStream = require('xml-stream') const nameUtils = require('./name-utils') const options = require('../options') +const logger = require('../logger') const _ = require('lodash/fp') const debug_log = require('./debug') // KOSTIS TODO: remove @@ -59,6 +60,13 @@ const processAlias = _.curry((groupTypes, aliasNode) => { const getNamePart = processDocumentedNamePart(groupTypes) const latinNameNode = _.find(isLatin, aliasNode.DocumentedName) + if (!latinNameNode) { + const id = aliasNode.$.FixedRef + const message = `Alias for Person with ID="${id}" has no latinized name` + logger.error(message) + return + } + const namePartNodes = latinNameNode.DocumentedNamePart const nameParts = _.map(getNamePart, namePartNodes) @@ -86,6 +94,7 @@ function processFeature (featureNode) { if (featureNode.$.FeatureTypeID !== BIRTH_DATE) return const datePeriodNode = featureNode.FeatureVersion.DatePeriod + // Ignore the fact that both Start and end can be a range. // By using Start.From and End.To we use the extremes of the date-period. const period = { @@ -107,9 +116,14 @@ function processProfile (profileNode) { const groupTypesEntries = _.map(processMasterNamePartGroup, identityNode.NamePartGroups.MasterNamePartGroup) const groupTypes = new Map(groupTypesEntries) + const mapCompact = _.flow(_.compact, _.map) + const getNameParts = processAlias(groupTypes) - const aliases = _.compact(_.map(getNameParts, identityNode.Alias)) - const birthDatePeriods = _.compact(_.map(processFeature, profileNode.Feature)) + const aliases = mapCompact(getNameParts, identityNode.Alias) + + if (_.isEmpty(aliases)) return + + const birthDatePeriods = mapCompact(processFeature, profileNode.Feature) const individual = {aliases, birthDatePeriods} debug_log(individual) @@ -135,7 +149,7 @@ function promiseParseDocument (source) { xml.collect('MasterNamePartGroup') const individuals = [] - const collectResult = result => individuals.push(result) + const collectResult = result => result && individuals.push(result) xml.on('updateElement: Profile', _.flow(processProfile, collectResult)) xml.on('end', _.wrap(resolve, individuals)) @@ -147,8 +161,8 @@ const readdir = util.promisify(fs.readdir) const combineAndDedupe = _.flow( _.flatten, _.compact, - _.uniqBy(_.nth(0)), - _.map(_.nth(1)) + _.uniqBy(_.first), + _.map(_.last) ) function parseList () { @@ -160,11 +174,7 @@ function parseList () { } return readdir(OFAC_DATA_DIR) - .then(sources => { - const promises = _.map(promiseParseDocument, sources) - - return Promise.all(promises) - }) + .then(sources => Promise.all(_.map(promiseParseDocument, sources))) .then(combineAndDedupe) } diff --git a/lib/ofac/index.js b/lib/ofac/index.js index 325de342..4e698ecf 100644 --- a/lib/ofac/index.js +++ b/lib/ofac/index.js @@ -5,35 +5,32 @@ const _ = require('lodash/fp') const debug_log = require('./debug') // KOSTIS TODO: remove -const individuals = [] +let individuals = [] function load () { - const newList = Array.from(dataParser.parseList()) - const oldLength = individuals.length - individuals.splice(0, oldLength, newList) + individuals = Array.from(dataParser.parseList()) } -load() - // MATCHING // birth date -function isDateWithinSomeYearsOfPeriod (period, date, years) { - const startDate = new Date(period.from.date) - const startYear = startDate.getFullYear() - startDate.setFullYear(startYear - years) +function isDateWithinSomeDaysOfPeriod (period, date, days) { + const inMillisecs = 24 * 60 * 60 * 1000 - const endDate = new Date(period.to.date) - const endYear = endDate.getFullYear() - endDate.setFullYear(endYear + years) + const startTime = period.from.date.getTime() - days * inMillisecs + const startDate = new Date(startTime) + + const endTime = period.to.date.getTime() + days * inMillisecs + const endDate = new Date(endTime) return (startDate < date && date < endDate) } -function isBornWithinTwoYears (individual, dateObject) { - const isWithinTwoYears = _.partialRight(isDateWithinSomeYearsOfPeriod, [dateObject.date, 2]) - return _.some(isWithinTwoYears, individual.birthDatePeriods) +function isBornTooLongSince (individual, dateObject, days) { + if (_.isEmpty(individual.birthDatePeriods)) return false + const isWithinSomeYears = _.partialRight(isDateWithinSomeDaysOfPeriod, [dateObject.date, days]) + return !_.some(isWithinSomeYears, individual.birthDatePeriods) } // exact match @@ -59,9 +56,10 @@ function calcPhoneticMatchScore (candidatePhoneticFullName) { const similarity = _.curry((candidate, individual) => { // Calculate if his birth date is within two years of the given date. - // If an individual has multiple birth-date periods, return wether any are + // If an individual has multiple birth-date periods, return whether any are // within two years. Reject individuals who don't match this criterion. - if (individual.birthDatePeriods.length && !isBornWithinTwoYears(individual, candidate.birthDate)) return 0 + const twoYears = 365 * 2 + if (!isBornTooLongSince(individual, candidate.birthDate, twoYears)) return 0 // Calculate the Jaro-Winkler similarity of the full name. // If an individual has multiple aliases, use the maximum score. diff --git a/lib/ofac/name-utils.js b/lib/ofac/name-utils.js index dfd86b5f..2da4683b 100644 --- a/lib/ofac/name-utils.js +++ b/lib/ofac/name-utils.js @@ -10,12 +10,12 @@ const phoneticMethod2 = _.flow(doubleMetaphone, _.uniq) const phoneticMethod3 = _.flow(_.split(' '), _.map(phoneticMethod2)) -// Combine name-parts in a standared order. +// Combine name-parts in a standard order. const fullNameFromParts = _.flow( _.toPairs, - _.sortBy(_.nth(0)), // sort by part name, - _.map(_.nth(1)), // get part value + _.sortBy(_.first), // sort by part name, + _.map(_.last), // get part value _.join(' ') )