lamassu-server/lib/ofac/parsing.js
2024-06-28 15:39:19 +01:00

161 lines
4.3 KiB
JavaScript

const fs = require('fs')
const XmlStream = require('xml-stream')
const nameUtils = require('./name-utils')
const logger = require('../logger')
const _ = require('lodash/fp')
// KOSTIS TODO: get these from the document itself
const INDIVIDUAL = '4'
const NAME = '1403'
const BIRTH_DATE = '8'
const PRIMARY_LATIN = '1'
const LAST_NAME = '1520'
const FIRST_NAME = '1521'
const MIDDLE_NAME = '1522'
const MAIDEN_NAME = '1523'
const PATRONYMIC = '91708'
const MATRONYMIC = '91709'
const NICKNAME = '1528'
const partNames = new Map([
[LAST_NAME, 'lastName'],
[FIRST_NAME, 'firstName'],
[MIDDLE_NAME, 'middleName'],
[MAIDEN_NAME, 'maidenName'],
[PATRONYMIC, 'patronymic'],
[MATRONYMIC, 'matronymic'],
[NICKNAME, 'nickname']
])
const filteredWords = [
// 'al'
]
// group-id to type-id
function processMasterNamePartGroup (groupNode) {
const namePartGroupNode = groupNode.NamePartGroup
const groupId = namePartGroupNode.$.ID
const typeId = namePartGroupNode.$.NamePartTypeID
return [groupId, typeId]
}
const processDocumentedNamePart = _.curry((groupTypes, namePartNode) => {
const valueNode = namePartNode.NamePartValue
const groupId = valueNode.$.NamePartGroupID
const typeId = groupTypes.get(groupId)
const partName = partNames.get(typeId)
const value = _.lowerCase(valueNode.$text)
return {partName, value}
})
const isLatin = _.matchesProperty(['$', 'DocNameStatusID'], PRIMARY_LATIN)
const processAlias = _.curry((groupTypes, aliasNode) => {
if (aliasNode.$.AliasTypeID !== NAME) return
if (aliasNode.$.LowQuality === 'true') return
const getNamePart = processDocumentedNamePart(groupTypes)
const latinNameNode = _.find(isLatin, aliasNode.DocumentedName)
if (!latinNameNode) {
const id = aliasNode.$.FixedRef
const message = `Alias for Person with ID="${id}" has no latinized name`
logger.error(message)
return
}
const id = latinNameNode.$.ID
const namePartNodes = latinNameNode.DocumentedNamePart
const parts = _.map(getNamePart, namePartNodes)
const fullName = nameUtils.makeFullName(parts)
const words = _.flow(
nameUtils.makeWords,
_.reject(_.flow(
_.get('value'),
word => filteredWords.includes(word)
))
)(fullName)
// if (words.length < 2) {
// console.log(JSON.stringify(words))
// }
return {id, parts, fullName, words}
})
// birth date
function processDate (dateNode) {
const year = parseInt(dateNode.Year)
const month = parseInt(dateNode.Month)
const day = parseInt(dateNode.Day)
return {year, month, day}
}
function processFeature (featureNode) {
if (featureNode.$.FeatureTypeID !== BIRTH_DATE) return
const datePeriodNode = featureNode.FeatureVersion.DatePeriod
// Ignore the fact that both Start and end can be a range.
// By using Start.From and End.To we use the extremes of the date-period.
const period = {
start: datePeriodNode.Start.From,
end: datePeriodNode.End.To
}
return _.mapValues(processDate, period)
}
// profile
function processProfile (profileNode) {
if (profileNode.$.PartySubTypeID !== INDIVIDUAL) return
const id = profileNode.$.ID
const identityNode = profileNode.Identity
const groupTypesEntries = _.map(processMasterNamePartGroup, identityNode.NamePartGroups.MasterNamePartGroup)
const groupTypes = new Map(groupTypesEntries)
const mapCompact = _.flow(_.map, _.compact)
const getNameParts = processAlias(groupTypes)
const aliases = mapCompact(getNameParts, identityNode.Alias)
if (_.isEmpty(aliases)) return
const birthDatePeriods = mapCompact(processFeature, profileNode.Feature)
const individual = {id, aliases, birthDatePeriods}
return individual
}
const parse = (source, callback) => {
const stream = fs.createReadStream(source)
const xml = new XmlStream(stream)
xml.on('error', err => {
xml.pause()
const message = `Error while parsing OFAC data source file (${source}): ${err.message}`
callback(new Error(message))
})
xml.collect('Alias')
xml.collect('DocumentedName')
xml.collect('DocumentedNamePart')
xml.collect('Feature')
xml.collect('MasterNamePartGroup')
const forwardProfile = profile => profile && callback(null, profile)
xml.on('updateElement: Profile', _.flow(processProfile, forwardProfile))
xml.on('end', () => {
callback(null, null)
})
}
module.exports = {parse}