Corrections and initial algorithm
This commit is contained in:
parent
fd659c88c6
commit
c4307cb749
5 changed files with 365 additions and 3 deletions
|
|
@ -11,6 +11,7 @@ KEY_DIR=$PWD/certs
|
|||
LAMASSU_CA_PATH=$PWD/Lamassu_CA.pem
|
||||
MIGRATE_STATE_PATH=$CONFIG_DIR/.migrate
|
||||
POSTGRES_PASS=postgres123
|
||||
OFAC_DATA_DIR=$CONFIG_DIR/ofac
|
||||
|
||||
mkdir -p $CERT_DIR
|
||||
mkdir -p $CONFIG_DIR >> $LOG_FILE 2>&1
|
||||
|
|
@ -64,6 +65,8 @@ openssl x509 \
|
|||
|
||||
rm /tmp/Lamassu_OP.csr.pem
|
||||
|
||||
mkdir -p $OFAC_DATA_DIR
|
||||
|
||||
cat <<EOF > $CONFIG_DIR/lamassu.json
|
||||
{
|
||||
"postgresql": "psql://postgres:$POSTGRES_PASS@localhost/lamassu",
|
||||
|
|
@ -75,7 +78,8 @@ cat <<EOF > $CONFIG_DIR/lamassu.json
|
|||
"logLevel": "debug",
|
||||
"lamassuCaPath": "$LAMASSU_CA_PATH",
|
||||
"lamassuServerPath": "$PWD",
|
||||
"migrateStatePath": "$MIGRATE_STATE_PATH"
|
||||
"migrateStatePath": "$MIGRATE_STATE_PATH",
|
||||
"ofacDataDir": "$OFAC_DATA_DIR"
|
||||
}
|
||||
EOF
|
||||
|
||||
|
|
|
|||
6
install
6
install
|
|
@ -16,6 +16,7 @@ SEEDS_DIR=$HOME/seeds
|
|||
SEED_FILE=$SEEDS_DIR/seed.txt
|
||||
BACKUP_DIR=/var/backups/postgresql
|
||||
BLOCKCHAIN_DIR=/mnt/blockchains
|
||||
OFAC_DATA_DIR=/var/lamassu/ofac
|
||||
|
||||
# Look into http://unix.stackexchange.com/questions/140734/configure-localtime-dpkg-reconfigure-tzdata
|
||||
|
||||
|
|
@ -145,6 +146,8 @@ openssl x509 \
|
|||
|
||||
rm /tmp/Lamassu_OP.csr.pem
|
||||
|
||||
mkdir -p $OFAC_DATA_DIR
|
||||
|
||||
cat <<EOF > $CONFIG_DIR/lamassu.json
|
||||
{
|
||||
"postgresql": "postgres://lamassu_pg:$POSTGRES_PW@localhost/lamassu",
|
||||
|
|
@ -156,7 +159,8 @@ cat <<EOF > $CONFIG_DIR/lamassu.json
|
|||
"logLevel": "info",
|
||||
"lamassuServerPath": "$NODE_MODULES/lamassu-server",
|
||||
"migrateStatePath": "$MIGRATE_STATE_PATH",
|
||||
"blockchainDir": "$BLOCKCHAIN_DIR"
|
||||
"blockchainDir": "$BLOCKCHAIN_DIR",
|
||||
"ofacDataDir": "$OFAC_DATA_DIR"
|
||||
}
|
||||
EOF
|
||||
|
||||
|
|
|
|||
266
lib/ofac/index.js
Normal file
266
lib/ofac/index.js
Normal file
|
|
@ -0,0 +1,266 @@
|
|||
const fs = require('fs')
|
||||
const path = require('path')
|
||||
const XmlStream = require('xml-stream')
|
||||
const jaroWinkler = require('talisman/metrics/distance/jaro-winkler')
|
||||
const metaphone = require('talisman/phonetics/metaphone')
|
||||
const options = require('../options')
|
||||
const logger = require('../logger')
|
||||
const _ = require('lodash/fp')
|
||||
|
||||
// PARSING
|
||||
|
||||
const OFAC_DATA_DIR = options.ofacDataDir
|
||||
|
||||
// TODO: get these from the document itself
|
||||
const INDIVIDUAL = '4'
|
||||
const NAME = '1403'
|
||||
const BIRTH_DATE = '8'
|
||||
|
||||
const LAST_NAME = '1520'
|
||||
const FIRST_NAME = '1521'
|
||||
const MIDDLE_NAME = '1522'
|
||||
const MAIDEN_NAME = '1523'
|
||||
const PATRONYMIC = '91708'
|
||||
const MATRONYMIC = '91709'
|
||||
const NICKNAME = '1528'
|
||||
|
||||
const partNames = new Map([
|
||||
[LAST_NAME, 'lastName'],
|
||||
[FIRST_NAME, 'firstName'],
|
||||
[MIDDLE_NAME, 'middleName'],
|
||||
[MAIDEN_NAME, 'maidenName'],
|
||||
[PATRONYMIC, 'patronymic'],
|
||||
[MATRONYMIC, 'matronymic'],
|
||||
[NICKNAME, 'nickname']
|
||||
])
|
||||
|
||||
// TODO: get this from admin configuration
|
||||
const SIMILARITY_THRESHOLD = 0.5
|
||||
|
||||
// TODO: remove
|
||||
const debug_log = (...args) => console.log(require('util').inspect(args, {depth: null, colors: true}))
|
||||
|
||||
let individuals = []
|
||||
const individualsById = new Map()
|
||||
|
||||
// group-id to type-id
|
||||
|
||||
function processMasterNamePartGroup (groupNode) {
|
||||
const namePartGroupNode = groupNode.NamePartGroup
|
||||
const groupId = namePartGroupNode.$.ID
|
||||
const typeId = namePartGroupNode.$.NamePartTypeID
|
||||
return [groupId, typeId]
|
||||
}
|
||||
|
||||
// name parts
|
||||
|
||||
function makeFullNameFromParts (nameParts) {
|
||||
// Combine name-parts in a standared order.
|
||||
const namePartPairs = _.toPairs(nameParts)
|
||||
const sortedPairs = _.sortBy(_.nth(0), namePartPairs)
|
||||
return _.map(_.nth(1), sortedPairs).join(' ')
|
||||
}
|
||||
|
||||
function makePhonetic (name) {
|
||||
return metaphone(name)
|
||||
}
|
||||
|
||||
function processDocumentedNamePart (groupTypes) {
|
||||
return function (namePartNode) {
|
||||
const valueNode = namePartNode.NamePartValue
|
||||
const groupId = valueNode.$.NamePartGroupID
|
||||
const typeId = groupTypes.get(groupId)
|
||||
const partName = partNames.get(typeId)
|
||||
const value = valueNode.$text
|
||||
return {[partName]: value}
|
||||
}
|
||||
}
|
||||
|
||||
function processAlias (groupTypes) {
|
||||
return function (aliasNode) {
|
||||
if (aliasNode.$.AliasTypeID !== NAME) return
|
||||
|
||||
const nameParts = _.map(processDocumentedNamePart(groupTypes), aliasNode.DocumentedName.DocumentedNamePart)
|
||||
const parts = _.assignAll(nameParts)
|
||||
const fullName = makeFullNameFromParts(parts)
|
||||
|
||||
const phoneticParts = _.mapValues(makePhonetic, parts)
|
||||
const phoneticFullName = makePhonetic(fullName)
|
||||
|
||||
return {parts, fullName, phoneticParts, phoneticFullName}
|
||||
}
|
||||
}
|
||||
|
||||
// birth date
|
||||
|
||||
function processDate (dateNode) {
|
||||
const year = parseInt(dateNode.Year)
|
||||
const month = parseInt(dateNode.Month)
|
||||
const day = parseInt(dateNode.Day)
|
||||
const date = new Date(year, month - 1, day)
|
||||
|
||||
return {year, month, day, date}
|
||||
}
|
||||
|
||||
function processFeature (featureNode) {
|
||||
if (featureNode.$.FeatureTypeID !== BIRTH_DATE) return
|
||||
|
||||
const datePeriodNode = featureNode.FeatureVersion.DatePeriod
|
||||
// Ignore the fact that both Start and end can be a range.
|
||||
// By using Start.From and End.To we use the extremes of the date-period.
|
||||
const period = {
|
||||
start: datePeriodNode.Start.From,
|
||||
end: datePeriodNode.End.To
|
||||
}
|
||||
|
||||
return _.mapValues(processDate, period)
|
||||
}
|
||||
|
||||
// profile
|
||||
|
||||
function processProfile (profileNode) {
|
||||
if (profileNode.$.PartySubTypeID !== INDIVIDUAL) return
|
||||
|
||||
const id = profileNode.$.ID
|
||||
|
||||
const identityNode = profileNode.Identity
|
||||
const groupTypesEntries = _.map(processMasterNamePartGroup, identityNode.NamePartGroups.MasterNamePartGroup)
|
||||
const groupTypes = new Map(groupTypesEntries)
|
||||
|
||||
const aliases = _.compact(_.map(processAlias(groupTypes), identityNode.Alias))
|
||||
const birthDatePeriods = _.compact(_.map(processFeature, profileNode.Feature))
|
||||
const individual = {aliases, birthDatePeriods}
|
||||
|
||||
individualsById.set(id, individual)
|
||||
debug_log(individual)
|
||||
}
|
||||
|
||||
function promiseParseDocument (source) {
|
||||
return new Promise(resolve => {
|
||||
const fileName = path.join(OFAC_DATA_DIR, source)
|
||||
const stream = fs.createReadStream(fileName)
|
||||
const xml = new XmlStream(stream)
|
||||
|
||||
xml.on('error', error => {
|
||||
logger.error('Error while parsing the OFAC data sources.')
|
||||
logger.error(error)
|
||||
xml.pause()
|
||||
resolve()
|
||||
})
|
||||
|
||||
xml.collect('Alias')
|
||||
xml.collect('DocumentedNamePart')
|
||||
xml.collect('Feature')
|
||||
xml.collect('MasterNamePartGroup')
|
||||
|
||||
xml.on('updateElement: Profile', processProfile)
|
||||
|
||||
xml.on('end', resolve)
|
||||
})
|
||||
}
|
||||
|
||||
function load () {
|
||||
// NOTE: Not sure how you push code updates to existing clients. This problem
|
||||
// might pop up if new code is pushed, without re-doing setup.
|
||||
if (!OFAC_DATA_DIR) {
|
||||
logger.error('The ofacDataDir option has not been set in lamassu.json')
|
||||
return
|
||||
}
|
||||
|
||||
individualsById.clear()
|
||||
|
||||
const sources = fs.readdirSync(OFAC_DATA_DIR)
|
||||
const promises = _.map(promiseParseDocument, sources)
|
||||
|
||||
return Promise.all(promises)
|
||||
.then(() => {
|
||||
individuals = Array.from(individualsById.values())
|
||||
})
|
||||
}
|
||||
|
||||
// MATCHING
|
||||
|
||||
// birth date
|
||||
|
||||
function isDateWithinTwoYearsOfPeriod (targetDate) {
|
||||
return function (period) {
|
||||
const startDate = new Date(period.from.date)
|
||||
const startYear = startDate.getFullYear()
|
||||
startDate.setFullYear(startYear - 2)
|
||||
|
||||
const endDate = new Date(period.to.date)
|
||||
const endYear = endDate.getFullYear()
|
||||
endDate.setFullYear(endYear + 2)
|
||||
|
||||
return (startDate < targetDate && targetDate < endDate)
|
||||
}
|
||||
}
|
||||
|
||||
function isBornWithinTwoYears (individual, dateObject) {
|
||||
return _.some(isDateWithinTwoYearsOfPeriod(dateObject.date), individual.birthDatePeriods)
|
||||
}
|
||||
|
||||
// exact match
|
||||
|
||||
function calcExactMatchScore (candidateFullName) {
|
||||
return function (alias) {
|
||||
return jaroWinkler(alias.fullName, candidateFullName)
|
||||
}
|
||||
}
|
||||
|
||||
// phonetic match
|
||||
|
||||
function calcPhoneticMatchScore (candidatePhoneticFullName) {
|
||||
return function (alias) {
|
||||
return jaroWinkler(alias.phoneticFullName, candidatePhoneticFullName)
|
||||
}
|
||||
}
|
||||
|
||||
// algorithm
|
||||
|
||||
// NOTE: I'm still not 100% on what matching algorithm is the best choice.
|
||||
// I just experiment with a few metrics for now.
|
||||
|
||||
function doesMatch (nameParts, fullName, phoneticParts, phoneticFullName, birthDate) {
|
||||
return function (individual) {
|
||||
// Calculate if his birth date is within two years of the given date.
|
||||
// If an individual has multiple birth-date periods, return wether any are
|
||||
// within two years. Reject individuals who don't match this criterion.
|
||||
if (individual.birthDatePeriods.length && !isBornWithinTwoYears(individual, birthDate)) return false
|
||||
|
||||
// Calculate the Jaro-Winkler similarity of the full name.
|
||||
// If an individual has multiple aliases, use the maximum score.
|
||||
const exactMatchScore = _.max(_.map(calcExactMatchScore(fullName), individual.aliases))
|
||||
|
||||
if (exactMatchScore > SIMILARITY_THRESHOLD) return true
|
||||
|
||||
// Calculate the Jaro-Winkler similarity of the phonetic representation of the full name.
|
||||
// This should approximate the phonetic similarity of the two names.
|
||||
// If an individual has multiple aliases, use the maximum score.
|
||||
const phoneticMatchScore = _.max(_.map(calcPhoneticMatchScore(phoneticFullName), individual.aliases))
|
||||
|
||||
if (phoneticMatchScore > SIMILARITY_THRESHOLD) return true
|
||||
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
function match (nameParts, birthDateString) {
|
||||
// nameParts should be an object like {firstName: "John", lastName: "Doe", ...}
|
||||
const fullName = makeFullNameFromParts(nameParts)
|
||||
|
||||
const phoneticParts = _.mapValues(makePhonetic, nameParts)
|
||||
const phoneticFullName = makePhonetic(fullName)
|
||||
|
||||
// birthDateString is in YYYYMMDD format
|
||||
const year = parseInt(birthDateString.slice(0, 4))
|
||||
const month = parseInt(birthDateString.slice(4, 6))
|
||||
const day = parseInt(birthDateString.slice(6, 8))
|
||||
const date = new Date(year, month - 1, day)
|
||||
|
||||
const birthDate = {year, month, day, date}
|
||||
|
||||
return _.some(doesMatch(nameParts, fullName, phoneticParts, phoneticFullName, birthDate), individuals)
|
||||
}
|
||||
|
||||
module.exports = {load, match}
|
||||
86
package-lock.json
generated
86
package-lock.json
generated
|
|
@ -4616,6 +4616,11 @@
|
|||
"core-util-is": "1.0.2"
|
||||
}
|
||||
},
|
||||
"html-entities": {
|
||||
"version": "1.2.1",
|
||||
"resolved": "https://registry.npmjs.org/html-entities/-/html-entities-1.2.1.tgz",
|
||||
"integrity": "sha1-DfKTUfByEWNRXfueVUPl9u7VFi8="
|
||||
},
|
||||
"htmlparser2": {
|
||||
"version": "3.9.2",
|
||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.9.2.tgz",
|
||||
|
|
@ -4727,6 +4732,14 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"iconv": {
|
||||
"version": "2.3.0",
|
||||
"resolved": "https://registry.npmjs.org/iconv/-/iconv-2.3.0.tgz",
|
||||
"integrity": "sha512-eu9senpOZ7wzNweLX09jtrCdmEiie8Z5/iMxdIq3i7tkgg562EwKSU9yjXMz8ncaQ0B+845vbqAz+1kPFXzbtQ==",
|
||||
"requires": {
|
||||
"nan": "2.6.2"
|
||||
}
|
||||
},
|
||||
"iconv-lite": {
|
||||
"version": "0.4.15",
|
||||
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.15.tgz",
|
||||
|
|
@ -5715,6 +5728,14 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"mnemonist": {
|
||||
"version": "0.14.0",
|
||||
"resolved": "https://registry.npmjs.org/mnemonist/-/mnemonist-0.14.0.tgz",
|
||||
"integrity": "sha512-GosoNab9mShR9w6QJA+bFT9NC2fN+2smFuQ6dEvjt4myuMLFvy63qeFE0cJZ8DQQxmCrqKftTAA/8N686cRPiQ==",
|
||||
"requires": {
|
||||
"obliterator": "1.2.1"
|
||||
}
|
||||
},
|
||||
"moment": {
|
||||
"version": "2.18.1",
|
||||
"resolved": "https://registry.npmjs.org/moment/-/moment-2.18.1.tgz",
|
||||
|
|
@ -5792,6 +5813,15 @@
|
|||
"resolved": "https://registry.npmjs.org/node-abi/-/node-abi-2.0.3.tgz",
|
||||
"integrity": "sha1-DKZ+XmZ7jhNDVJyhcVOoFdC7/ao="
|
||||
},
|
||||
"node-expat": {
|
||||
"version": "2.3.16",
|
||||
"resolved": "https://registry.npmjs.org/node-expat/-/node-expat-2.3.16.tgz",
|
||||
"integrity": "sha512-e3HyQI0lk5CXyYQ4RsDYGiWdY5LJxNMlNCzo4/gwqY8lhYIeTf5VwGirGDa1EPrcZROmOR37wHuFVnoHmOWnOw==",
|
||||
"requires": {
|
||||
"bindings": "1.2.1",
|
||||
"nan": "2.6.2"
|
||||
}
|
||||
},
|
||||
"node-hkdf-sync": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/node-hkdf-sync/-/node-hkdf-sync-1.0.0.tgz",
|
||||
|
|
@ -5932,6 +5962,11 @@
|
|||
"is-extendable": "0.1.1"
|
||||
}
|
||||
},
|
||||
"obliterator": {
|
||||
"version": "1.2.1",
|
||||
"resolved": "https://registry.npmjs.org/obliterator/-/obliterator-1.2.1.tgz",
|
||||
"integrity": "sha512-KMA0nZW3Z0UdG9Qtt5Ti8aFg8WvWHE8dKEL2/U5/+PfqyzpVyeLVXOrwhFskyrxnYjn936JZVm76rshSOYHgxQ=="
|
||||
},
|
||||
"observable-to-promise": {
|
||||
"version": "0.5.0",
|
||||
"resolved": "https://registry.npmjs.org/observable-to-promise/-/observable-to-promise-0.5.0.tgz",
|
||||
|
|
@ -6176,6 +6211,11 @@
|
|||
"resolved": "https://registry.npmjs.org/packet-reader/-/packet-reader-0.3.1.tgz",
|
||||
"integrity": "sha1-zWLmCvjX/qinBexP+ZCHHEaHHyc="
|
||||
},
|
||||
"pandemonium": {
|
||||
"version": "1.4.1",
|
||||
"resolved": "https://registry.npmjs.org/pandemonium/-/pandemonium-1.4.1.tgz",
|
||||
"integrity": "sha512-KhwY9xv8tZGQE8L7FfzaTHrLH+JnarUsDlsa8mqfisjtU3J00P362IL52Ei/EhDp025yBDzPuES/zMdWvvAR5g=="
|
||||
},
|
||||
"parse-glob": {
|
||||
"version": "3.0.4",
|
||||
"resolved": "https://registry.npmjs.org/parse-glob/-/parse-glob-3.0.4.tgz",
|
||||
|
|
@ -7784,6 +7824,19 @@
|
|||
"integrity": "sha1-lag9smGG1q9+ehjb2XYKL4bQj0A=",
|
||||
"dev": true
|
||||
},
|
||||
"talisman": {
|
||||
"version": "0.20.0",
|
||||
"resolved": "https://registry.npmjs.org/talisman/-/talisman-0.20.0.tgz",
|
||||
"integrity": "sha512-cIO2x+MaWrY/d1YxzzTnNI3Jkgmqy0NSpiEOhJ2ExvHNySlVodcGRK7mj+xRNDIUy9qSKWq2B8XTSt6THMLLbQ==",
|
||||
"requires": {
|
||||
"html-entities": "1.2.1",
|
||||
"lodash": "4.17.4",
|
||||
"long": "3.2.0",
|
||||
"mnemonist": "0.14.0",
|
||||
"obliterator": "1.2.1",
|
||||
"pandemonium": "1.4.1"
|
||||
}
|
||||
},
|
||||
"tar": {
|
||||
"version": "2.2.1",
|
||||
"resolved": "https://registry.npmjs.org/tar/-/tar-2.2.1.tgz",
|
||||
|
|
@ -8417,6 +8470,39 @@
|
|||
"resolved": "https://registry.npmjs.org/xhr2/-/xhr2-0.1.4.tgz",
|
||||
"integrity": "sha1-f4dliEdxbbUCYyOBL4GMras4el8="
|
||||
},
|
||||
"xml-stream": {
|
||||
"version": "0.4.5",
|
||||
"resolved": "https://registry.npmjs.org/xml-stream/-/xml-stream-0.4.5.tgz",
|
||||
"integrity": "sha1-dFLYWzf5uIGnDv8M90oN8CCI7es=",
|
||||
"requires": {
|
||||
"iconv": "2.3.0",
|
||||
"node-expat": "2.3.16",
|
||||
"readable-stream": "1.1.14"
|
||||
},
|
||||
"dependencies": {
|
||||
"isarray": {
|
||||
"version": "0.0.1",
|
||||
"resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz",
|
||||
"integrity": "sha1-ihis/Kmo9Bd+Cav8YDiTmwXR7t8="
|
||||
},
|
||||
"readable-stream": {
|
||||
"version": "1.1.14",
|
||||
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.1.14.tgz",
|
||||
"integrity": "sha1-fPTFTvZI44EwhMY23SB54WbAgdk=",
|
||||
"requires": {
|
||||
"core-util-is": "1.0.2",
|
||||
"inherits": "2.0.3",
|
||||
"isarray": "0.0.1",
|
||||
"string_decoder": "0.10.31"
|
||||
}
|
||||
},
|
||||
"string_decoder": {
|
||||
"version": "0.10.31",
|
||||
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz",
|
||||
"integrity": "sha1-YuIDvEF2bGwoyfyEMB2rHFMQ+pQ="
|
||||
}
|
||||
}
|
||||
},
|
||||
"xmlbuilder": {
|
||||
"version": "8.2.2",
|
||||
"resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-8.2.2.tgz",
|
||||
|
|
|
|||
|
|
@ -50,11 +50,13 @@
|
|||
"serve-static": "^1.12.4",
|
||||
"socket.io": "^2.0.3",
|
||||
"socket.io-client": "^2.0.3",
|
||||
"talisman": "^0.20.0",
|
||||
"twilio": "^3.6.1",
|
||||
"uuid": "^3.1.0",
|
||||
"web3": "^0.19.1",
|
||||
"winston": "^2.3.0",
|
||||
"ws": "^3.1.0"
|
||||
"ws": "^3.1.0",
|
||||
"xml-stream": "^0.4.5"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue