Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issn loader improvements #7053

Merged
merged 28 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
3279481
Deactivated records should get 409 on GET requests
amontenegro Feb 27, 2024
b3660c2
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Feb 28, 2024
2cc66ab
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Mar 1, 2024
f2dc713
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Mar 4, 2024
b4f8223
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Mar 7, 2024
b0026c3
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Mar 12, 2024
53e7616
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Mar 19, 2024
bf82372
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Mar 25, 2024
23b6afb
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Apr 4, 2024
182c67d
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Apr 4, 2024
c52ef13
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Apr 8, 2024
d4f779b
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Apr 8, 2024
27b0033
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Apr 17, 2024
585b896
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Apr 18, 2024
9c9dfef
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro May 8, 2024
42ac636
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro May 28, 2024
3f0d771
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Jun 10, 2024
e65ec79
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Jun 20, 2024
e69191a
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Jun 24, 2024
cff8029
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Jul 3, 2024
ddf3e67
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Jul 4, 2024
a091bb3
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Jul 5, 2024
97024f0
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Jul 15, 2024
2f8657f
Add better error message for when a issn couldnt be found
amontenegro Jul 17, 2024
49c3dd8
Running locally
amontenegro Jul 17, 2024
e43ac75
Banned exception and more logging
amontenegro Jul 18, 2024
3201e7e
Merge branch 'main' of https://github.com/ORCID/ORCID-Source
amontenegro Jul 18, 2024
1280d31
Merge with latest main
amontenegro Jul 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package org.orcid.core.exception;

import java.util.Map;

public class BannedException extends ApplicationException {

private static final long serialVersionUID = 1L;

public BannedException() {
super();
}

public BannedException(Map<String, String> params) {
super(params);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import org.eclipse.jetty.http.HttpStatus;
import org.orcid.core.exception.BannedException;
import org.orcid.core.exception.TooManyRequestsException;
import org.orcid.core.exception.UnexpectedResponseCodeException;
import org.orcid.core.utils.http.HttpRequestUtils;
Expand All @@ -36,19 +37,24 @@ public class IssnClient {
@Resource
private HttpRequestUtils httpRequestUtils;

public IssnData getIssnData(String issn) throws TooManyRequestsException, UnexpectedResponseCodeException, IOException, URISyntaxException, InterruptedException {
public IssnData getIssnData(String issn) throws BannedException, TooManyRequestsException, UnexpectedResponseCodeException, IOException, URISyntaxException, InterruptedException, JSONException {
if(StringUtils.isEmpty(issn)) {
return null;
}
LOG.debug("Extracting ISSN for " + issn);
String json = getJsonDataFromIssnPortal(issn.toUpperCase());
try {
if (json != null) {
IssnData data = extractIssnData(issn.toUpperCase(), json);
data.setIssn(issn);
return data;
} else {
IssnData data = extractIssnData(issn.toUpperCase(), json);
data.setIssn(issn);
return data;
} catch (JSONException e) {
LOG.warn("Error extracting issn data from json returned from issn portal "+ issn);
if(json == null) {
return null;
} else if(json.contains("you have been banned")) {
throw new BannedException();
} else {
throw e;
}
} catch (Exception e) {
LOG.warn("Error extracting issn data from json returned from issn portal "+ issn);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import javax.annotation.Resource;

import org.codehaus.jettison.json.JSONException;
import org.orcid.core.exception.*;
import org.orcid.core.groupIds.issn.IssnClient;
import org.orcid.core.groupIds.issn.IssnData;
Expand Down Expand Up @@ -149,6 +150,9 @@ private GroupIdRecord createIssnGroupIdRecord(String groupId, String issn) {
} catch (InterruptedException e) {
LOG.warn("InterruptedException for issn {}", issn);
throw new InvalidIssnException();
} catch(JSONException e) {
LOG.warn("JSONException for issn {}", issn, e);
throw new InvalidIssnException();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import javax.annotation.Resource;

import org.codehaus.jettison.json.JSONException;
import org.orcid.core.exception.*;
import org.orcid.core.groupIds.issn.IssnClient;
import org.orcid.core.groupIds.issn.IssnData;
Expand Down Expand Up @@ -66,7 +67,7 @@ public GroupIdRecord createGroupIdRecord(GroupIdRecord groupIdRecord) {
}

@Override
public GroupIdRecord createOrcidSourceIssnGroupIdRecord(String groupId, String issn) {
public GroupIdRecord createOrcidSourceIssnGroupIdRecord(String groupId, String issn) throws TooManyRequestsException, BannedException {
GroupIdRecord issnRecord = createIssnGroupIdRecord(groupId, issn);
GroupIdRecordEntity entity = jpaJaxbGroupIdRecordAdapter.toGroupIdRecordEntity(issnRecord);
entity.setClientSourceId(orcidSourceClientDetailsId);
Expand Down Expand Up @@ -114,7 +115,7 @@ public void deleteGroupIdRecord(Long putCode) {
}
}

private GroupIdRecord createIssnGroupIdRecord(String groupId, String issn) {
private GroupIdRecord createIssnGroupIdRecord(String groupId, String issn) throws TooManyRequestsException, BannedException {
if (!issnValidator.issnValid(issn)) {
throw new InvalidIssnException();
}
Expand Down Expand Up @@ -148,6 +149,9 @@ private GroupIdRecord createIssnGroupIdRecord(String groupId, String issn) {
} catch (InterruptedException e) {
LOG.warn("InterruptedException for issn {}", issn);
throw new InvalidIssnException();
} catch(JSONException e) {
LOG.warn("JSONException for issn {}", issn);
throw new InvalidIssnException();
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.orcid.scheduler.loader.source.issn;

import java.io.IOException;
import java.net.URISyntaxException;
import java.net.*;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
Expand All @@ -10,6 +10,8 @@
import javax.annotation.Resource;

import org.apache.commons.lang3.StringUtils;
import org.codehaus.jettison.json.JSONException;
import org.orcid.core.exception.BannedException;
import org.orcid.core.exception.TooManyRequestsException;
import org.orcid.core.exception.UnexpectedResponseCodeException;
import org.orcid.core.groupIds.issn.IssnClient;
Expand All @@ -19,6 +21,7 @@
import org.orcid.persistence.dao.GroupIdRecordDao;
import org.orcid.persistence.jpa.entities.ClientDetailsEntity;
import org.orcid.persistence.jpa.entities.GroupIdRecordEntity;
import org.orcid.utils.alerting.SlackManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
Expand All @@ -36,7 +39,10 @@ public class IssnLoadSource {

@Value("${org.orcid.scheduler.issnLoadSource.waitBetweenBatches:10000}")
private int waitBetweenBatches;


@Value("${org.orcid.scheduler.issnLoadSource.rateLimit.pause:600000}")
private int pause;

@Resource
private GroupIdRecordDao groupIdRecordDao;

Expand All @@ -53,6 +59,9 @@ public class IssnLoadSource {

@Resource
private IssnClient issnClient;

@Resource
private SlackManager slackManager;

public void loadIssn(String issnSource) {

Expand All @@ -74,6 +83,7 @@ private void updateIssnGroupIdRecords() {
List<GroupIdRecordEntity> issnEntities = groupIdRecordDaoReadOnly.getIssnRecordsSortedBySyncDate(batchSize, startTime);
int batchCount = 0;
int total = 0;

while (!issnEntities.isEmpty()) {
for (GroupIdRecordEntity issnEntity : issnEntities) {
LOG.info("Processing entity {}", new Object[]{ issnEntity.getId() });
Expand All @@ -89,9 +99,29 @@ private void updateIssnGroupIdRecords() {
new Object[]{issnEntity.getId(), issnEntity.getGroupId(), Integer.toString(total)});
}
} catch(TooManyRequestsException tmre) {
//TODO: We are being rate limited, we have to pause
//We are being rate limited, we have to pause for 'pause' minutes
LOG.warn("We are being rate limited by the issn portal");
recordFailure(issnEntity, "RATE_LIMIT reached");
if(pause() != 1) {
LOG.warn("Unable to pause, finishing the process");
return;
}
} catch(BannedException be) {
LOG.error("We have been banned from the issn portal, the sync process will finish now");
try {
InetAddress id = InetAddress.getLocalHost();
slackManager.sendSystemAlert("We have bee banned from the issn portal on " + id.getHostName());
} catch(UnknownHostException uhe) {
// Lets try to get the IP address
try(final DatagramSocket socket = new DatagramSocket()){
socket.connect(InetAddress.getByName("8.8.8.8"), 10002);
String ip = socket.getLocalAddress().getHostAddress();
slackManager.sendSystemAlert("We have bee banned from the issn portal on " + ip);
} catch(SocketException | UnknownHostException se) {
slackManager.sendSystemAlert("We have bee banned from the issn portal on - Couldn't identify the machine");
}
}
return;
} catch(UnexpectedResponseCodeException urce) {
LOG.warn("Unexpected response code {} for issn {}", urce.getReceivedCode(), issn);
recordFailure(issnEntity, "Unexpected response code " + urce.getReceivedCode());
Expand All @@ -104,6 +134,9 @@ private void updateIssnGroupIdRecords() {
} catch (InterruptedException e) {
LOG.warn("InterruptedException for issn {}", issn);
recordFailure(issnEntity, "InterruptedException");
} catch(JSONException e) {
LOG.warn("InterruptedException for issn {}", issn);
recordFailure(issnEntity, "InterruptedException");
}
} else {
LOG.info("Issn for group record {} not valid: {}", issnEntity.getId(), issnEntity.getGroupId());
Expand Down Expand Up @@ -164,4 +197,15 @@ private String getIssn(GroupIdRecordEntity issnEntity) {
return null;
}

private int pause() {
try {
LOG.warn("Pause do to rate limit");
Thread.sleep(pause);
return 1;
} catch (InterruptedException e) {
LOG.warn("Unable to pause", e);
return -1;
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
*/
public class SlackManagerImpl implements SlackManager {

@Value("${org.orcid.core.slack.webhookUrl:}")
@Value("${org.orcid.core.slack.webhookUrl}")
private String webhookUrl;

@Value("${org.orcid.core.slack.channel}")
Expand Down
Loading