Skip to content

Commit

Permalink
added command to infer phone code
Browse files Browse the repository at this point in the history
added command to infer phone code
  • Loading branch information
namsor committed Dec 15, 2020
1 parent 34fa47e commit 9d3d280
Showing 1 changed file with 84 additions and 22 deletions.
106 changes: 84 additions & 22 deletions src/main/java/com/namsor/tools/NamSorTools.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@

import com.namsor.sdk2.api.AdminApi;
import com.namsor.sdk2.api.PersonalApi;
import com.namsor.sdk2.api.SocialApi;
import com.namsor.sdk2.invoke.ApiClient;
import com.namsor.sdk2.invoke.ApiException;
import com.namsor.sdk2.model.BatchFirstLastNameDiasporaedOut;
import com.namsor.sdk2.model.BatchFirstLastNameGenderedOut;
import com.namsor.sdk2.model.BatchFirstLastNameGeoIn;
import com.namsor.sdk2.model.BatchFirstLastNameIn;
import com.namsor.sdk2.model.BatchFirstLastNameOriginedOut;
import com.namsor.sdk2.model.BatchFirstLastNamePhoneCodedOut;
import com.namsor.sdk2.model.BatchFirstLastNamePhoneNumberIn;
import com.namsor.sdk2.model.BatchFirstLastNameUSRaceEthnicityOut;
import com.namsor.sdk2.model.BatchPersonalNameGenderedOut;
import com.namsor.sdk2.model.BatchPersonalNameGeoIn;
Expand All @@ -25,6 +28,8 @@
import com.namsor.sdk2.model.FirstLastNameGeoIn;
import com.namsor.sdk2.model.FirstLastNameIn;
import com.namsor.sdk2.model.FirstLastNameOriginedOut;
import com.namsor.sdk2.model.FirstLastNamePhoneCodedOut;
import com.namsor.sdk2.model.FirstLastNamePhoneNumberIn;
import com.namsor.sdk2.model.FirstLastNameUSRaceEthnicityOut;
import com.namsor.sdk2.model.PersonalNameGenderedOut;
import com.namsor.sdk2.model.PersonalNameGeoIn;
Expand Down Expand Up @@ -81,26 +86,29 @@ public class NamSorTools {
private static final String INPUT_DATA_FORMAT_FNLNGEO = "fnlngeo";
private static final String INPUT_DATA_FORMAT_FULLNAME = "name";
private static final String INPUT_DATA_FORMAT_FULLNAMEGEO = "namegeo";
private static final String INPUT_DATA_FORMAT_FNLNPHONE = "fnlnphone";

private static final String[] INPUT_DATA_FORMAT = {
INPUT_DATA_FORMAT_FNLN,
INPUT_DATA_FORMAT_FNLNGEO,
INPUT_DATA_FORMAT_FULLNAME,
INPUT_DATA_FORMAT_FULLNAMEGEO
INPUT_DATA_FORMAT_FULLNAMEGEO,
INPUT_DATA_FORMAT_FNLNPHONE
};

private static final String[][] INPUT_DATA_FORMAT_HEADER = {
{"firstName", "lastName"},
{"firstName", "lastName", "countryIso2"},
{"fullName"},
{"fullName", "countryIso2"}
};
{"fullName", "countryIso2"},
{"firstName", "lastName", "phone"},};

private static final String SERVICE_NAME_PARSE = "parse";
private static final String SERVICE_NAME_GENDER = "gender";
private static final String SERVICE_NAME_ORIGIN = "origin";
private static final String SERVICE_NAME_COUNTRY = "country";
private static final String SERVICE_NAME_DIASPORA = "diaspora";
private static final String SERVICE_NAME_PHONECODE = "phonecode";
private static final String SERVICE_NAME_USRACEETHNICITY = "usraceethnicity";

private static final String[] SERVICES = {
Expand All @@ -109,7 +117,8 @@ public class NamSorTools {
SERVICE_NAME_ORIGIN,
SERVICE_NAME_COUNTRY,
SERVICE_NAME_DIASPORA,
SERVICE_NAME_USRACEETHNICITY
SERVICE_NAME_USRACEETHNICITY,
SERVICE_NAME_PHONECODE
};

private static final String[] OUTPUT_DATA_PARSE_HEADER = {"firstNameParsed", "lastNameParsed", "nameParserType", "nameParserTypeAlt", "nameParserTypeScore", "script"};
Expand All @@ -118,17 +127,21 @@ public class NamSorTools {
private static final String[] OUTPUT_DATA_COUNTRY_HEADER = {"country", "countryAlt", "probabilityCalibrated", "probabilityCalibratedAlt", "countryScore", "script"};
private static final String[] OUTPUT_DATA_DIASPORA_HEADER = {"ethnicity", "ethnicityAlt", "ethnicityScore", "script"};
private static final String[] OUTPUT_DATA_USRACEETHNICITY_HEADER = {"raceEthnicity", "raceEthnicityAlt", "probabilityCalibrated", "probabilityCalibratedAlt", "raceEthnicityScore", "script"};
private static final String[] OUTPUT_DATA_PHONECODE_HEADER = {"internationalPhoneNumberVerified", "phoneCountryIso2Verified", "phoneCountryCode", "phoneCountryCodeAlt", "phoneCountryIso2", "phoneCountryIso2Alt", "originCountryIso2", "originCountryIso2Alt", "verified", "score", "script"};

private static final String[][] OUTPUT_DATA_HEADERS = {
OUTPUT_DATA_PARSE_HEADER,
OUTPUT_DATA_GENDER_HEADER,
OUTPUT_DATA_ORIGIN_HEADER,
OUTPUT_DATA_COUNTRY_HEADER,
OUTPUT_DATA_DIASPORA_HEADER,
OUTPUT_DATA_USRACEETHNICITY_HEADER
OUTPUT_DATA_USRACEETHNICITY_HEADER,
OUTPUT_DATA_PHONECODE_HEADER
};

private final CommandLine commandLineOptions;
private final PersonalApi api;
private final SocialApi socialApi;
private final AdminApi adminApi;
private final int TIMEOUT = 30000;
private final boolean withUID;
Expand All @@ -149,13 +162,14 @@ public NamSorTools(CommandLine commandLineOptions) {
client.setWriteTimeout(TIMEOUT);
client.setApiKey(apiKey);
String basePath = commandLineOptions.getOptionValue("basePath");
if (basePath != null && ! basePath.isEmpty()) {
Logger.getLogger(NamSorTools.class.getName()).info("Overriding basePath="+basePath);
if (basePath != null && !basePath.isEmpty()) {
Logger.getLogger(NamSorTools.class.getName()).info("Overriding basePath=" + basePath);
client.setBasePath(basePath);
}
}
//client.setDebugging(false);
api = new PersonalApi(client);
adminApi = new AdminApi(client);
socialApi = new SocialApi(client);

withUID = commandLineOptions.hasOption("uid");
recover = commandLineOptions.hasOption("recover");
Expand Down Expand Up @@ -185,7 +199,7 @@ private static String bytesToHex(byte[] hash) {
}

public String digest(String inClear) {
if (getDigest() == null || inClear == null || inClear.isEmpty() ) {
if (getDigest() == null || inClear == null || inClear.isEmpty()) {
return inClear;
} else {
final byte[] hashbytes = getDigest().digest(
Expand Down Expand Up @@ -225,7 +239,7 @@ public static void main(String[] args) {
.longOpt("basePath")
.required(false)
.build();

Option inputFile = Option.builder("i").argName("inputFile")
.hasArg()
.desc("input file name")
Expand Down Expand Up @@ -267,7 +281,7 @@ public static void main(String[] args) {
.longOpt("skip")
.required(false)
.build();

Option inputDataFormat = Option.builder("f").argName("inputDataFormat")
.hasArg()
.desc("input data format : first name, last name (fnln) / first name, last name, geo country iso2 (fnlngeo) / full name (name) / full name, geo country iso2 (namegeo) ")
Expand Down Expand Up @@ -304,7 +318,7 @@ public static void main(String[] args) {

Option service = Option.builder("service").argName("service")
.hasArg(true)
.desc("service : parse / gender / origin / country / diaspora / usraceethnicity")
.desc("service : parse / gender / origin / country / diaspora / usraceethnicity / phoneCode")
.longOpt("endpoint")
.required(true)
.build();
Expand Down Expand Up @@ -393,7 +407,7 @@ private void run() throws NamSorToolException {
}
String outputFileName = getCommandLineOptions().getOptionValue("outputFile");
if (outputFileName == null || outputFileName.isEmpty()) {
outputFileName = inputFileName + "." + service + (digest!=null?".digest":"") + ".namsor";
outputFileName = inputFileName + "." + service + (digest != null ? ".digest" : "") + ".namsor";
Logger.getLogger(getClass().getName()).info("Outputing to " + outputFileName);
}
File outputFile = new File(outputFileName);
Expand Down Expand Up @@ -505,7 +519,7 @@ private void process(String service, BufferedReader reader, Writer writer, Strin
}
String[] lineData = line.split("\\|");
if (lineData.length != dataLenExpected) {
if( skipErrors ) {
if (skipErrors) {
Logger.getLogger(getClass().getName()).warning("Line " + lineId + ", expected input with format : " + dataFormatExpected.toString() + " line = " + line);
lineId++;
line = reader.readLine();
Expand Down Expand Up @@ -562,6 +576,17 @@ private void process(String service, BufferedReader reader, Writer writer, Strin
personalNameGeoIn.setName(fullName);
personalNameGeoIn.setCountryIso2(countryIso2);
personalNamesGeoIn.put(uid, personalNameGeoIn);
} else if (inputDataFormat.equals(INPUT_DATA_FORMAT_FNLNPHONE)) {
String firstName = lineData[col++];
String lastName = lineData[col++];
String phoneNumber = lineData[col++];

FirstLastNamePhoneNumberIn firstLastNamePhoneNumberIn = new FirstLastNamePhoneNumberIn();
firstLastNamePhoneNumberIn.setId(uid);
firstLastNamePhoneNumberIn.setFirstName(firstName);
firstLastNamePhoneNumberIn.setLastName(lastName);
firstLastNamePhoneNumberIn.setPhoneNumber(phoneNumber);
firstLastNamesPhoneNumberIn.put(uid, firstLastNamePhoneNumberIn);
}
processData(service, outputHeaders, writer, false, softwareNameAndVersion);
}
Expand Down Expand Up @@ -655,18 +680,18 @@ private Map<String, PersonalNameGeoOut> processCountry(List<PersonalNameIn> name
}
return result;
}

private Map<String, PersonalNameGeoOut> processCountryAdapted(List<FirstLastNameIn> names_) throws ApiException, IOException {
List<PersonalNameIn> names = new ArrayList();
for (FirstLastNameIn name : names_) {
PersonalNameIn adapted = new PersonalNameIn();
adapted.setId(name.getId());
adapted.setName(name.getFirstName()+" "+name.getLastName());
names.add(adapted);
PersonalNameIn adapted = new PersonalNameIn();
adapted.setId(name.getId());
adapted.setName(name.getFirstName() + " " + name.getLastName());
names.add(adapted);
}
return processCountry(names);
}
}

private Map<String, PersonalNameGenderedOut> processGenderFullGeo(List<PersonalNameGeoIn> names) throws ApiException, IOException {
Map<String, PersonalNameGenderedOut> result = new HashMap();
BatchPersonalNameGeoIn body = new BatchPersonalNameGeoIn();
Expand Down Expand Up @@ -722,6 +747,17 @@ private Map<String, FirstLastNameUSRaceEthnicityOut> processUSRaceEthnicity(List
return result;
}

private Map<String, FirstLastNamePhoneCodedOut> processPhoneCode(ArrayList<FirstLastNamePhoneNumberIn> names) throws ApiException {
Map<String, FirstLastNamePhoneCodedOut> result = new HashMap();
BatchFirstLastNamePhoneNumberIn body = new BatchFirstLastNamePhoneNumberIn();
body.setPersonalNamesWithPhoneNumbers(names);
BatchFirstLastNamePhoneCodedOut phoneCoded = socialApi.phoneCodeBatch(body);
for (FirstLastNamePhoneCodedOut personalName : phoneCoded.getPersonalNamesWithPhoneNumbers()) {
result.put(personalName.getId(), personalName);
}
return result;
}

private void processData(String service, String[] outputHeaders, Writer writer, boolean flushBuffers, String softwareNameAndVersion) throws ApiException, IOException {
if (flushBuffers && !firstLastNamesIn.isEmpty() || firstLastNamesIn.size() >= BATCH_SIZE) {
if (service.equals(SERVICE_NAME_ORIGIN)) {
Expand Down Expand Up @@ -775,6 +811,13 @@ private void processData(String service, String[] outputHeaders, Writer writer,
}
personalNamesGeoIn.clear();
}
if (flushBuffers && !firstLastNamesPhoneNumberIn.isEmpty() || firstLastNamesPhoneNumberIn.size() >= BATCH_SIZE) {
if (service.equals(SERVICE_NAME_PHONECODE)) {
Map<String, FirstLastNamePhoneCodedOut> phoneCodes = processPhoneCode(new ArrayList(firstLastNamesPhoneNumberIn.values()));
append(writer, outputHeaders, firstLastNamesPhoneNumberIn, phoneCodes, softwareNameAndVersion);
}
firstLastNamesPhoneNumberIn.clear();
}
}

private void append(Writer writer, String[] outputHeaders, Map input, Map output, String softwareNameAndVersion) throws IOException {
Expand All @@ -797,6 +840,10 @@ private void append(Writer writer, String[] outputHeaders, Map input, Map output
} else if (inputObj instanceof PersonalNameGeoIn) {
PersonalNameGeoIn personalNameGeoIn = (PersonalNameGeoIn) inputObj;
writer.append(digest(personalNameGeoIn.getName()) + separatorOut + personalNameGeoIn.getCountryIso2() + separatorOut);
} else if (inputObj instanceof FirstLastNamePhoneNumberIn) {
FirstLastNamePhoneNumberIn firstLastNamePhoneNumberIn = (FirstLastNamePhoneNumberIn) inputObj;
writer.append(digest(firstLastNamePhoneNumberIn.getFirstName()) + separatorOut + digest(firstLastNamePhoneNumberIn.getLastName())+ separatorOut+ digest(firstLastNamePhoneNumberIn.getPhoneNumber())+ separatorOut);
//
} else {
throw new IllegalArgumentException("Serialization of " + inputObj.getClass().getName() + " not supported");
}
Expand Down Expand Up @@ -827,14 +874,28 @@ private void append(Writer writer, String[] outputHeaders, Map input, Map output
} else if (outputObj instanceof PersonalNameGeoOut) {
PersonalNameGeoOut personalNameGeoOut = (PersonalNameGeoOut) outputObj;
String scriptName = NamSorTools.computeScriptFirst(personalNameGeoOut.getName());
writer.append(personalNameGeoOut.getCountry() + separatorOut + personalNameGeoOut.getCountryAlt() + separatorOut + personalNameGeoOut.getProbabilityCalibrated() + separatorOut + personalNameGeoOut.getProbabilityAltCalibrated() + separatorOut +personalNameGeoOut.getScore() + separatorOut + scriptName + separatorOut);
writer.append(personalNameGeoOut.getCountry() + separatorOut + personalNameGeoOut.getCountryAlt() + separatorOut + personalNameGeoOut.getProbabilityCalibrated() + separatorOut + personalNameGeoOut.getProbabilityAltCalibrated() + separatorOut + personalNameGeoOut.getScore() + separatorOut + scriptName + separatorOut);
} else if (outputObj instanceof PersonalNameParsedOut) {
PersonalNameParsedOut personalNameParsedOut = (PersonalNameParsedOut) outputObj;
// {"firstNameParsed", "lastNameParsed", "nameParserType", "nameParserTypeAlt", "nameParserTypeScore"};
String firstNameParsed = (personalNameParsedOut.getFirstLastName() != null ? personalNameParsedOut.getFirstLastName().getFirstName() : "");
String lastNameParsed = (personalNameParsedOut.getFirstLastName() != null ? personalNameParsedOut.getFirstLastName().getLastName() : "");
String scriptName = NamSorTools.computeScriptFirst(personalNameParsedOut.getName());
writer.append(firstNameParsed + separatorOut + lastNameParsed + separatorOut + personalNameParsedOut.getNameParserType() + separatorOut + personalNameParsedOut.getNameParserTypeAlt() + separatorOut + personalNameParsedOut.getScore() + separatorOut + scriptName + separatorOut);
} else if (outputObj instanceof FirstLastNamePhoneCodedOut) {
FirstLastNamePhoneCodedOut firstLastNamePhoneCodedOut = (FirstLastNamePhoneCodedOut) outputObj;
String scriptName = NamSorTools.computeScriptFirst(firstLastNamePhoneCodedOut.getLastName());
writer.append(firstLastNamePhoneCodedOut.getInternationalPhoneNumberVerified() + separatorOut
+ firstLastNamePhoneCodedOut.getPhoneCountryIso2Verified() + separatorOut
+ firstLastNamePhoneCodedOut.getPhoneCountryCode() + separatorOut
+ firstLastNamePhoneCodedOut.getPhoneCountryCodeAlt() + separatorOut
+ firstLastNamePhoneCodedOut.getPhoneCountryIso2() + separatorOut
+ firstLastNamePhoneCodedOut.getPhoneCountryIso2Alt() + separatorOut
+ firstLastNamePhoneCodedOut.getOriginCountryIso2() + separatorOut
+ firstLastNamePhoneCodedOut.getOriginCountryIso2Alt() + separatorOut
+ firstLastNamePhoneCodedOut.getVerified() + separatorOut
+ firstLastNamePhoneCodedOut.getScore() + separatorOut
+ scriptName);
} else {
throw new IllegalArgumentException("Serialization of " + outputObj.getClass().getName() + " not supported");
}
Expand All @@ -857,6 +918,7 @@ private void append(Writer writer, String[] outputHeaders, Map input, Map output
private final Map<String, FirstLastNameIn> firstLastNamesIn = new HashMap();
private final Map<String, PersonalNameIn> personalNamesIn = new HashMap();
private final Map<String, PersonalNameGeoIn> personalNamesGeoIn = new HashMap();
private final Map<String, FirstLastNamePhoneNumberIn> firstLastNamesPhoneNumberIn = new HashMap();

/**
* @return the withUID
Expand Down

0 comments on commit 9d3d280

Please sign in to comment.