| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98 |
1
1
1
1
1
1
1
1
1
1
1
1
| 'use strict';
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.default = fuzzySoundex;
var _deburr = require('lodash/deburr');
var _deburr2 = _interopRequireDefault(_deburr);
var _helpers = require('../helpers');
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
function _toConsumableArray(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } else { return Array.from(arr); } } /**
* Talisman phonetics/fuzzy-soundex
* =================================
*
* Implementation of the "Fuzzy Soundex" algorithm.
*
* [Reference]:
* http://wayback.archive.org/web/20100629121128/http://www.ir.iit.edu/publications/downloads/IEEESoundexV5.pdf
*
* [Article]:
* Holmes, David and M. Catherine McCabe. "Improving Precision and Recall for
* Soundex Retrieval."
*/
/**
* Constants.
*/
var TRANSLATION = (0, _helpers.translation)('ABCDEFGHIJKLMNOPQRSTUVWXYZ', '0193017-07745501769301-7-9');
var SET1 = new Set(['CS', 'CZ', 'TS', 'TZ']),
SET2 = new Set(['HR', 'WR']),
SET3 = new Set(['KN', 'NG']),
SET4 = new Set('HWY');
var RULES = [[/CA/g, 'KA'], [/CC/g, 'KK'], [/CK/g, 'KK'], [/CE/g, 'SE'], [/CHL/g, 'KL'], [/CL/g, 'KL'], [/CHR/g, 'KR'], [/CR/g, 'KR'], [/CI/g, 'SI'], [/CO/g, 'KO'], [/CU/g, 'KU'], [/CY/g, 'SY'], [/DG/g, 'GG'], [/GH/g, 'HH'], [/MAC/g, 'MK'], [/MC/g, 'MK'], [/NST/g, 'NSS'], [/PF/g, 'FF'], [/PH/g, 'FF'], [/SCH/g, 'SSS'], [/TIO/g, 'SIO'], [/TIA/g, 'SIO'], [/TCH/g, 'CHH']];
/**
* Function taking a single name and computing its fuzzy Soundex code.
*
* @param {string} name - The name to process.
* @return {string} - The fuzzy Soundex code.
*
* @throws {Error} The function expects the name to be a string.
*/
function fuzzySoundex(name) {
if (typeof name !== 'string') throw Error('talisman/phonetics/fuzzy-soundex: the given name is not a string.');
if (!name) return '';
// Deburring the string & dropping any non-alphabetical character
name = (0, _deburr2.default)(name).toUpperCase().replace(/[^A-Z]/g, '');
// Applying some substitutions for beginnings
var firstTwoLetters = name.slice(0, 2),
rest = name.slice(2);
if (SET1.has(firstTwoLetters)) name = 'SS' + rest;else if (firstTwoLetters === 'GN') name = 'NN' + rest;else if (SET2.has(firstTwoLetters)) name = 'RR' + rest;else if (firstTwoLetters === 'HW') name = 'WW' + rest;else if (SET3.has(firstTwoLetters)) name = 'NN' + rest;
// Applying some substitutions for endings
var lastTwoLetters = name.slice(-2),
initial = name.slice(0, -2);
if (lastTwoLetters === 'CH') name = initial + 'KK';else if (lastTwoLetters === 'NT') name = initial + 'TT';else if (lastTwoLetters === 'RT') name = initial + 'RR';else if (name.slice(-3) === 'RDT') name = name.slice(0, -3) + 'RR';
// Applying the rules
for (var i = 0, l = RULES.length; i < l; i++) {
var _name;
name = (_name = name).replace.apply(_name, _toConsumableArray(RULES[i]));
} // Caching the first letter
var firstLetter = name[0];
// Translating
var code = '';
for (var _i = 0, _l = name.length; _i < _l; _i++) {
code += TRANSLATION[name[_i]] || name[_i];
} // Removing hyphens
code = code.replace(/-/g, '');
// Squeezing the code
code = (0, _helpers.squeeze)(code);
// Dealing with some initials
if (SET4.has(code[0])) code = firstLetter + code;else code = firstLetter + code.slice(1);
// Dropping vowels
code = code.replace(/0/g, '');
return code;
}
module.exports = exports['default'];
|