Skip to content

Commit

Permalink
Merge pull request #179 from socrata/rjm/nbe-stuff
Browse files Browse the repository at this point in the history
Rjm/nbe stuff
  • Loading branch information
rjmac authored Apr 4, 2018
2 parents af390f1 + 81e645e commit 58423a1
Show file tree
Hide file tree
Showing 70 changed files with 3,859 additions and 2,441 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ common/
_layouts/
public/
log.txt
/api-key.txt
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ This repository is our development basecamp. If you find a bug or have questions
### Apache Maven
DataSync uses Maven for building and package management. For more information: [What is Maven?](http://maven.apache.org/what-is-maven.html)

To build the project run:
To build the project, first you'll need to create an application token on your profile page. Put the random string it produces in a file called "api-key.txt" in the root directory of this project, then run
```
mvn clean install
```
Expand All @@ -57,5 +57,6 @@ java -jar DataSync-1.8.2-jar-with-dependencies.jar

### Java SDK

DataSync can be used as a Java SDK, for detailed documentation refer to:
DataSync can be used as a Java SDK, for detailed documentation refer
to:
[http://socrata.github.io/datasync/guides/datasync-library-sdk.html](http://socrata.github.io/datasync/guides/datasync-library-sdk.html)
40 changes: 32 additions & 8 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>DataSync</groupId>
<artifactId>DataSync</artifactId>
<version>1.8.2</version>
<version>1.9.0</version>
<developers>
<developer>
<name>Ayn Leslie-Cook</name>
Expand Down Expand Up @@ -60,11 +60,25 @@
<resource>
<directory>images/</directory>
</resource>
<resource>
<directory>.</directory>
<filtering>true</filtering>
<includes>
<include>api-key.txt</include>
</includes>
</resource>
</resources>
<testResources>
<testResource>
<directory>src/test/java</directory>
</testResource>
<testResource>
<directory>.</directory>
<filtering>true</filtering>
<includes>
<include>api-key.txt</include>
</includes>
</testResource>
</testResources>
<plugins>
<plugin>
Expand Down Expand Up @@ -97,7 +111,7 @@
<dependency>
<groupId>com.socrata</groupId>
<artifactId>soda-api-java</artifactId>
<version>0.9.12</version>
<version>0.10.1</version>
</dependency>
<dependency>
<groupId>com.socrata</groupId>
Expand All @@ -110,14 +124,19 @@
<version>1.4.7</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-core-asl</artifactId>
<version>1.9.13</version>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>2.8.6</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.8.6</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
<version>1.9.13</version>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>2.8.6</version>
</dependency>
<dependency>
<groupId>org.tukaani</groupId>
Expand Down Expand Up @@ -180,5 +199,10 @@
<artifactId>javac2</artifactId>
<version>7.0.3</version>
</dependency>
<dependency>
<groupId>info.debatty</groupId>
<artifactId>java-string-similarity</artifactId>
<version>1.1.0</version>
</dependency>
</dependencies>
</project>
88 changes: 64 additions & 24 deletions src/main/java/com/socrata/datasync/DatasetUtils.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package com.socrata.datasync;

import au.com.bytecode.opencsv.CSVReader;
import com.socrata.datasync.config.userpreferences.UserPreferences;
import com.socrata.model.importer.Column;
import com.socrata.model.importer.Dataset;
import com.socrata.model.importer.GeoDataset;
import com.socrata.model.importer.DatasetInfo;
import org.apache.http.HttpException;
import org.apache.http.HttpStatus;
Expand All @@ -14,42 +16,50 @@
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.utils.URIBuilder;
import org.codehaus.jackson.map.DeserializationConfig;
import org.codehaus.jackson.map.ObjectMapper;
import com.fasterxml.jackson.databind.DeserializationConfig;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;

import java.io.IOException;
import java.io.StringReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class DatasetUtils {
private static final String LOCATION_DATATYPE_NAME = "location";

private static class DatasetInfoResponseHandler implements ResponseHandler<DatasetInfo> {
@Override
public DatasetInfo handleResponse(final HttpResponse response)
throws ClientProtocolException, IOException {

StatusLine statusLine = response.getStatusLine();
int status = statusLine.getStatusCode();
if (status >= 200 && status < 300) {
HttpEntity entity = response.getEntity();
return entity != null ? mapper.readValue(entity.getContent(), DatasetInfo.class) : null;
} else {
throw new ClientProtocolException(statusLine.toString());
}
}
}
private static ObjectMapper mapper = new ObjectMapper().enable(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY);

public static Dataset getDatasetInfo(UserPreferences userPrefs, String viewId) throws URISyntaxException, IOException, HttpException {
Dataset ds = getDatasetInfoReflective(userPrefs, viewId, Dataset.class);
removeSystemAndComputedColumns(ds);
return ds;
}

private static final String LOCATION_DATATYPE_NAME = "location";
public static GeoDataset getGeoDatasetInfo(UserPreferences userPrefs, String viewId) throws URISyntaxException, IOException, HttpException {
return getDatasetInfoReflective(userPrefs, viewId, GeoDataset.class);
}

private static ObjectMapper mapper = new ObjectMapper().enable(DeserializationConfig.Feature.ACCEPT_SINGLE_VALUE_AS_ARRAY);
private static void removeSystemAndComputedColumns(Dataset ds) {
List<Column> columns = ds.getColumns();
Iterator<Column> it = columns.iterator();
while(it.hasNext()) {
Column c = it.next();
if(c.getFieldName().startsWith(":") || c.getComputationStrategy() != null) {
it.remove();
}
}
ds.setColumns(columns);
}

public static <T> T getDatasetInfo(UserPreferences userPrefs, String viewId, final Class<T> typ) throws URISyntaxException, IOException, HttpException {
private static <T> T getDatasetInfoReflective(UserPreferences userPrefs, String viewId, final Class<T> typ) throws URISyntaxException, IOException, HttpException {
String justDomain = getDomainWithoutScheme(userPrefs);
URI absolutePath = new URIBuilder()
.setScheme("https")
Expand Down Expand Up @@ -78,12 +88,12 @@ public T handleResponse(
return datasetInfo;
}

public static String getDatasetSample(UserPreferences userPrefs, String viewId, int rowsToSample) throws URISyntaxException, IOException, HttpException {
public static List<List<String>> getDatasetSample(UserPreferences userPrefs, Dataset dataset, int rowsToSample) throws URISyntaxException, IOException, HttpException {
String justDomain = getDomainWithoutScheme(userPrefs);
URI absolutePath = new URIBuilder()
.setScheme("https")
.setHost(justDomain)
.setPath("/resource/" + viewId + ".csv")
.setPath("/resource/" + dataset.getId() + ".csv")
.addParameter("$limit",""+rowsToSample)
.build();

Expand All @@ -105,7 +115,37 @@ public String handleResponse(
HttpUtility util = new HttpUtility(userPrefs, true);
String sample = util.get(absolutePath, "application/csv", handler);
util.close();
return sample;

CSVReader reader = new CSVReader(new StringReader(sample));

List<List<String>> results = new ArrayList<>();

Set<String> expectedFieldNames = new HashSet<String>();
for(Column c : dataset.getColumns()) {
expectedFieldNames.add(c.getFieldName());
}
String[] row = reader.readNext();
boolean[] keep = new boolean[row.length];
for(int i = 0; i != row.length; ++i) {
keep[i] = expectedFieldNames.contains(row[i]);
}
results.add(filter(keep, row));

while((row = reader.readNext()) != null) {
results.add(filter(keep, row));
}

return results;
}

private static List<String> filter(boolean[] filter, String[] elems) {
List<String> result = new ArrayList<>();

for(int i = 0; i != elems.length; ++i) {
if(filter[i]) result.add(elems[i]);
}

return result;
}

public static String getDomainWithoutScheme(UserPreferences userPrefs){
Expand Down Expand Up @@ -140,7 +180,7 @@ public static String getRowIdentifierName(Dataset schema) {
* @return list of field names or null if there
*/
public static String getFieldNamesString(UserPreferences userPrefs, String datasetId) throws HttpException, IOException, URISyntaxException {
Dataset datasetInfo = getDatasetInfo(userPrefs, datasetId, Dataset.class);
Dataset datasetInfo = getDatasetInfo(userPrefs, datasetId);
return getFieldNamesString(datasetInfo);
}

Expand Down
6 changes: 3 additions & 3 deletions src/main/java/com/socrata/datasync/DatasyncGithubRelease.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package com.socrata.datasync;

import org.codehaus.jackson.annotate.JsonProperty;
import org.codehaus.jackson.map.annotate.JsonSerialize;
import org.codehaus.jackson.annotate.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;

@JsonSerialize(include=JsonSerialize.Inclusion.NON_NULL)
@JsonIgnoreProperties(ignoreUnknown=true)
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/socrata/datasync/HttpUtility.java
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public HttpUtility(UserPreferences userPrefs, boolean useAuth, int maxRetries, d
HttpClientBuilder clientBuilder = HttpClients.custom();
if (useAuth) {
authHeader = getAuthHeader(userPrefs.getUsername(), userPrefs.getPassword());
appToken = userPrefs.getAPIKey();
appToken = userPrefs.getConnectionInfo().getToken();
}
authRequired = useAuth;
if(userPrefs != null) {
Expand Down
Loading

0 comments on commit 58423a1

Please sign in to comment.