Skip to content

Commit

Permalink
Optimize performance and memory-usage by setting the buffer-size of "…
Browse files Browse the repository at this point in the history
…BufferedInputStream", "BufferedOutputStream" and "BufferedReader" to have the value of the "contentSize" of the resource, whenever it's available AND it's lower than the upper threshold (currently 5MB).
  • Loading branch information
LSmyrnaios committed Mar 6, 2024
1 parent 183fb4d commit abad66e
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -364,11 +364,12 @@ else if ( docFileNameType.equals(DocFileNameType.originalName) )
}
long bytesCount = 0;
FileOutputStream fileOutputStream = docFileData.getFileOutputStream();
int bufferSize = (((contentSize != -2) && contentSize < fiveMb) ? contentSize : fiveMb);

try ( BufferedInputStream inStream = new BufferedInputStream(inputStream, fiveMb);
BufferedOutputStream outStream = new BufferedOutputStream(((fileOutputStream != null) ? fileOutputStream : new FileOutputStream(docFile)), fiveMb) )
try ( BufferedInputStream inStream = new BufferedInputStream(inputStream, bufferSize);
BufferedOutputStream outStream = new BufferedOutputStream(((fileOutputStream != null) ? fileOutputStream : new FileOutputStream(docFile)), bufferSize) )
{
int maxStoringWaitingTime = getMaxStoringWaitingTime(contentSize);
int maxStoringWaitingTime = getMaxStoringWaitingTime(contentSize); // It handles the "-2" case.
int readByte = -1;
long startTime = System.nanoTime();
while ( (readByte = inStream.read()) != -1 )
Expand Down Expand Up @@ -469,11 +470,12 @@ public static synchronized DocFileData storeDocFileWithNumberName(HttpURLConnect
throw new RuntimeException("MD5 HASH ALGO MISSING");
}
long bytesCount = 0;
int bufferSize = (((contentSize != -2) && contentSize < fiveMb) ? contentSize : fiveMb);

try ( BufferedInputStream inStream = new BufferedInputStream(inputStream, fiveMb);
BufferedOutputStream outStream = new BufferedOutputStream(new FileOutputStream(docFile), fiveMb))
try ( BufferedInputStream inStream = new BufferedInputStream(inputStream, bufferSize);
BufferedOutputStream outStream = new BufferedOutputStream(new FileOutputStream(docFile), bufferSize))
{
int maxStoringWaitingTime = getMaxStoringWaitingTime(contentSize);
int maxStoringWaitingTime = getMaxStoringWaitingTime(contentSize); // It handles the "-2" case.
int readByte = -1;
long startTime = System.nanoTime();
while ( (readByte = inStream.read()) != -1 )
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ public static DocFileData downloadAndStoreDocFile(HttpURLConnection conn, String
int contentSize = 0;
if ( (contentSize = getContentSize(conn, true, false)) == -1 ) // "Unacceptable size"-code..
throw new DocFileNotRetrievedException("The HTTP-reported size of this file was unacceptable!");
// It may be "-2", in case it was not retrieved..

// Write the downloaded bytes to the docFile and return the docFileName.
DocFileData docFileData = null;
Expand Down Expand Up @@ -830,27 +831,31 @@ public static List<String> blockSharedSiteSessionDomains(String targetUrl, Strin

public static String getHtmlString(HttpURLConnection conn, BufferedReader bufferedReader, boolean isForError)
{
if ( getContentSize(conn, false, isForError) == -1 ) { // "Unacceptable size"-code..
int contentSize = 0;
if ( (contentSize = getContentSize(conn, false, isForError)) == -1 ) { // "Unacceptable size"-code..
if ( !isForError ) // It's expected to have ZERO-length most times, and thus the extraction cannot continue. Do not show a message. It's rare that we get an error-message anyway.
logger.warn("Aborting HTML-extraction for pageUrl: " + conn.getURL().toString());
ConnSupportUtils.closeBufferedReader(bufferedReader); // This page's content-type was auto-detected, and the process fails before re-requesting the conn-inputStream, then make sure we close the last one.
return null;
}
// It may be "-2" in case the "contentSize" was not available.

StringBuilder htmlStrB = htmlStrBuilder.get();
if ( htmlStrB == null ) {
htmlStrB = new StringBuilder(100000); // Initialize and pre-allocate the StringBuilder.
htmlStrBuilder.set(htmlStrB); // Save it for future use by this thread.
}

int bufferSize = 0;
InputStream inputStream = null;
if ( bufferedReader == null ) {
inputStream = checkEncodingAndGetInputStream(conn, isForError);
if ( inputStream == null ) // The error is already logged inside.
return null;
bufferSize = (((contentSize != -2) && contentSize < FileUtils.fiveMb) ? contentSize : FileUtils.fiveMb);
}

try (BufferedReader br = ((bufferedReader != null) ? bufferedReader : new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8), FileUtils.fiveMb)) ) // Try-with-resources
try (BufferedReader br = ((bufferedReader != null) ? bufferedReader : new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8), bufferSize)) ) // Try-with-resources
{
String inputLine;
while ( (inputLine = br.readLine()) != null )
Expand Down Expand Up @@ -963,18 +968,22 @@ public static ArrayList<Object> detectContentTypeFromResponseBody(String finalUr
*/
public static DetectedContentType extractContentTypeFromResponseBody(HttpURLConnection conn)
{
if ( getContentSize(conn, false, false) == -1 ) { // "Unacceptable size"-code..
logger.warn("Aborting HTML-extraction for pageUrl: " + conn.getURL().toString());
int contentSize = 0;
if ( (contentSize = getContentSize(conn, false, false)) == -1) { // "Unacceptable size"-code..
logger.warn("Aborting content-extraction for pageUrl: " + conn.getURL().toString());
return null;
}
// It may be "-2" in case the "contentSize" was not available.

InputStream inputStream = checkEncodingAndGetInputStream(conn, false);
if ( inputStream == null )
return null;

int bufferSize = (((contentSize != -2) && contentSize < FileUtils.fiveMb) ? contentSize : FileUtils.fiveMb);

BufferedReader br = null;
try {
br = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8), FileUtils.fiveMb);
br = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8), bufferSize);
String inputLine;

// Skip empty lines in the beginning of the HTML-code
Expand Down Expand Up @@ -1172,8 +1181,15 @@ public static InputStream getInputStreamFromInputDataUrl()
if ( inputStream == null )
throw new RuntimeException("Could not acquire the InputStream!");

// Check if we should abort the download based on its content-size.
int contentSize = 0;
if ( (contentSize = getContentSize(conn, true, false)) == -1 ) // "Unacceptable size"-code..
throw new DocFileNotRetrievedException("The HTTP-reported size of this file was unacceptable!");
// It may be "-2", in case it was not retrieved..
int bufferSize = (((contentSize != -2) && contentSize < FileUtils.fiveMb) ? contentSize : FileUtils.fiveMb);

// Wrap it with a buffer, for increased efficiency.
inputStream = new BufferedInputStream(inputStream, FileUtils.fiveMb);
inputStream = new BufferedInputStream(inputStream, bufferSize);

} catch (Exception e) {
String errorMessage = "Unexpected error when retrieving the input-stream from the inputDataUrl:\n" + e.getMessage();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public void testCustomInputOutputWithOriginalDocFileNames()
args[3] = "-docFileNameType";
args[4] = "originalName";
args[5] = "-docFilesStorage";
args[6] = "/storage/runs/run1/docFiles";
args[6] = "docFiles";
main(args);
}

Expand Down

0 comments on commit abad66e

Please sign in to comment.