Skip to content

Commit

Permalink
Merge pull request DSpace#9165 from TexasDigitalLibrary/DS-9059
Browse files Browse the repository at this point in the history
DS-9059: removes options to ping search engines when generating sitemaps
  • Loading branch information
alanorth authored Nov 6, 2023
2 parents 440f03c + f8f8806 commit 60d4bca
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 119 deletions.
109 changes: 3 additions & 106 deletions dspace-api/src/main/java/org/dspace/app/sitemap/GenerateSitemaps.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,8 @@
*/
package org.dspace.app.sitemap;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.sql.SQLException;
import java.util.Date;
import java.util.Iterator;
Expand All @@ -29,7 +22,6 @@
import org.apache.commons.cli.ParseException;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Logger;
import org.dspace.content.Collection;
Expand Down Expand Up @@ -87,11 +79,6 @@ public static void main(String[] args) throws Exception {
"do not generate sitemaps.org protocol sitemap");
options.addOption("b", "no_htmlmap", false,
"do not generate a basic HTML sitemap");
options.addOption("a", "ping_all", false,
"ping configured search engines");
options
.addOption("p", "ping", true,
"ping specified search engine URL");
options
.addOption("d", "delete", false,
"delete sitemaps dir and its contents");
Expand All @@ -116,14 +103,13 @@ public static void main(String[] args) throws Exception {
}

/*
* Sanity check -- if no sitemap generation or pinging to do, or deletion, print usage
* Sanity check -- if no sitemap generation or deletion, print usage
*/
if (line.getArgs().length != 0 || line.hasOption('d') || line.hasOption('b')
&& line.hasOption('s') && !line.hasOption('g')
&& !line.hasOption('m') && !line.hasOption('y')
&& !line.hasOption('p')) {
&& !line.hasOption('m') && !line.hasOption('y')) {
System.err
.println("Nothing to do (no sitemap to generate, no search engines to ping)");
.println("Nothing to do (no sitemap to generate)");
hf.printHelp(usage, options);
System.exit(1);
}
Expand All @@ -137,20 +123,6 @@ public static void main(String[] args) throws Exception {
deleteSitemaps();
}

if (line.hasOption('a')) {
pingConfiguredSearchEngines();
}

if (line.hasOption('p')) {
try {
pingSearchEngine(line.getOptionValue('p'));
} catch (MalformedURLException me) {
System.err
.println("Bad search engine URL (include all except sitemap URL)");
System.exit(1);
}
}

System.exit(0);
}

Expand Down Expand Up @@ -303,79 +275,4 @@ public static void generateSitemaps(boolean makeHTMLMap, boolean makeSitemapOrg)

c.abort();
}

/**
* Ping all search engines configured in {@code dspace.cfg}.
*
* @throws UnsupportedEncodingException theoretically should never happen
*/
public static void pingConfiguredSearchEngines()
throws UnsupportedEncodingException {
String[] engineURLs = configurationService
.getArrayProperty("sitemap.engineurls");

if (ArrayUtils.isEmpty(engineURLs)) {
log.warn("No search engine URLs configured to ping");
return;
}

for (int i = 0; i < engineURLs.length; i++) {
try {
pingSearchEngine(engineURLs[i]);
} catch (MalformedURLException me) {
log.warn("Bad search engine URL in configuration: "
+ engineURLs[i]);
}
}
}

/**
* Ping the given search engine.
*
* @param engineURL Search engine URL minus protocol etc, e.g.
* {@code www.google.com}
* @throws MalformedURLException if the passed in URL is malformed
* @throws UnsupportedEncodingException theoretically should never happen
*/
public static void pingSearchEngine(String engineURL)
throws MalformedURLException, UnsupportedEncodingException {
// Set up HTTP proxy
if ((StringUtils.isNotBlank(configurationService.getProperty("http.proxy.host")))
&& (StringUtils.isNotBlank(configurationService.getProperty("http.proxy.port")))) {
System.setProperty("proxySet", "true");
System.setProperty("proxyHost", configurationService
.getProperty("http.proxy.host"));
System.getProperty("proxyPort", configurationService
.getProperty("http.proxy.port"));
}

String sitemapURL = configurationService.getProperty("dspace.ui.url")
+ "/sitemap";

URL url = new URL(engineURL + URLEncoder.encode(sitemapURL, "UTF-8"));

try {
HttpURLConnection connection = (HttpURLConnection) url
.openConnection();

BufferedReader in = new BufferedReader(new InputStreamReader(
connection.getInputStream()));

String inputLine;
StringBuffer resp = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
resp.append(inputLine).append("\n");
}
in.close();

if (connection.getResponseCode() == 200) {
log.info("Pinged " + url.toString() + " successfully");
} else {
log.warn("Error response pinging " + url.toString() + ":\n"
+ resp);
}
} catch (IOException e) {
log.warn("Error pinging " + url.toString(), e);
}
}
}
13 changes: 0 additions & 13 deletions dspace/config/dspace.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -1403,19 +1403,6 @@ sitemap.dir = ${dspace.dir}/sitemaps
# Defaults to "sitemaps", which means they are available at ${dspace.server.url}/sitemaps/
# sitemap.path = sitemaps

#
# Comma-separated list of search engine URLs to 'ping' when a new Sitemap has
# been created. Include everything except the Sitemap URL itself (which will
# be URL-encoded and appended to form the actual URL 'pinged').
#
sitemap.engineurls = http://www.google.com/webmasters/sitemaps/ping?sitemap=

# Add this to the above parameter if you have an application ID with Yahoo
# (Replace REPLACE_ME with your application ID)
# http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=REPLACE_ME&url=
#
# No known Sitemap 'ping' URL for MSN/Live search

# Define cron for how frequently the sitemap should refresh.
# Defaults to running daily at 1:15am
# Cron syntax is defined at https://www.quartz-scheduler.org/api/2.3.0/org/quartz/CronTrigger.html
Expand Down

0 comments on commit 60d4bca

Please sign in to comment.