Skip to content

Commit

Permalink
fix: limit number of chars in chargement brut
Browse files Browse the repository at this point in the history
  • Loading branch information
Nolife999 committed Jan 23, 2024
1 parent 241a074 commit 9a917ea
Show file tree
Hide file tree
Showing 3 changed files with 609 additions and 25 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package fr.insee.arc.core.service.p2chargement.operation;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
Expand All @@ -18,6 +17,7 @@
import fr.insee.arc.utils.dao.UtilitaireDao;
import fr.insee.arc.utils.exception.ArcException;
import fr.insee.arc.utils.exception.ArcExceptionMessage;
import fr.insee.arc.utils.files.BoundedBufferedReader;
import fr.insee.arc.utils.utils.FormatSQL;

/**
Expand All @@ -27,12 +27,8 @@
*/
public class ChargementBrut {


/** Combien de boucle au maximum */
private static final int LIMIT_BOUCLE = 1;
/** Combien de ligne on charge pour chacune des boucles */
private static final int LIMIT_CHARGEMENT_BRUTAL_NB_LIGNE = 50;
private static final int LIMIT_CHARGEMENT_BRUTAL_NB_CHAR = 10000;
protected int maxNumberOfLinesToRead = 50;
protected int maxNumberOfCharacterByLineToRead = 10000;

private static final Logger LOGGER = LogManager.getLogger(ChargementBrut.class);
private Connection connexion;
Expand All @@ -43,24 +39,23 @@ public class ChargementBrut {
* @param br reader ouvert sur le fichier
* @param nbBoucle étape dans la lecture
* */
private String requeteFichierBrutalement(String idSource, BufferedReader br, int nbBoucle) throws ArcException {
protected String requeteFichierBrutalement(String idSource, BoundedBufferedReader br) throws ArcException {
StaticLoggerDispatcher.info(LOGGER, "** chargerFichierBrutalement **");


StringBuilder requete=new StringBuilder();
int idLigne = nbBoucle * LIMIT_CHARGEMENT_BRUTAL_NB_LIGNE;
int idLigne = 0;
String line;
try {
line = br.readLine();
line = line.substring(0, Math.min(line.length(), LIMIT_CHARGEMENT_BRUTAL_NB_CHAR));
line = br.readLine(maxNumberOfCharacterByLineToRead);
} catch (IOException e) {
throw new ArcException(e, ArcExceptionMessage.FILE_READ_FAILED, idSource);
}
if (line == null) {
throw new ArcException(ArcExceptionMessage.FILE_IS_EMPTY, idSource);
}

boolean start=true;
while (line != null && idLigne < (nbBoucle + 1) * LIMIT_CHARGEMENT_BRUTAL_NB_LIGNE) {
while (line != null && idLigne < maxNumberOfLinesToRead) {
if (start)
{
requete.append("\nSELECT "+FormatSQL.quoteText(idSource)+"::text as "+ColumnEnum.ID_SOURCE.getColumnName()+","+ idLigne +"::int as id_ligne,"+FormatSQL.quoteText(line)+"::text as ligne");
Expand All @@ -72,9 +67,9 @@ private String requeteFichierBrutalement(String idSource, BufferedReader br, int
}

idLigne++;
if (idLigne < (nbBoucle + 1) * LIMIT_CHARGEMENT_BRUTAL_NB_LIGNE) {
if (idLigne < maxNumberOfLinesToRead) {
try {
line = br.readLine();
line = br.readLine(maxNumberOfCharacterByLineToRead);
} catch (IOException e) {
throw new ArcException(e, ArcExceptionMessage.FILE_READ_FAILED, idSource);
}
Expand All @@ -91,22 +86,14 @@ public void calculeNormeAndValiditeFichiers(InputStream file, FileIdCard normeOk
throws ArcException {
StaticLoggerDispatcher.info(LOGGER, "** calculeNormeFichiers **");

int nbBoucle = 0;

try(InputStreamReader isr = new InputStreamReader(file);
BufferedReader br = new BufferedReader(isr);) {
BoundedBufferedReader br = new BoundedBufferedReader(isr);) {

// On boucle tant que l'on a pas une norme ou une exception
// - soit la norme est trouvée et on sort
// - soit aucune/trop de normes est/sont trouvé(s) et on sort de calculerNormeAndValidite avec une exception
// nbBoucle<LIMIT_BOUCLE n'entre jamais en jeu.
// Gênant si la norme utilise une ligne qui n'est pas dans les xxx premières lignes, mais choix temporaire pour éviter
// de charger un fichier entier à la recherche de sa norme
while (normeOk.getIdNorme() == null && nbBoucle<LIMIT_BOUCLE) {
calculerNormeAndValidite(normeOk, requeteFichierBrutalement(normeOk.getIdSource(), br, nbBoucle));

nbBoucle++;
}
calculerNormeAndValidite(normeOk, requeteFichierBrutalement(normeOk.getIdSource(), br));

} catch (IOException e) {
throw new ArcException(e, ArcExceptionMessage.FILE_READ_FAILED, normeOk.getIdSource());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package fr.insee.arc.core.service.p2chargement.operation;

import static org.junit.Assert.*;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Files;

import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

import fr.insee.arc.utils.exception.ArcException;
import fr.insee.arc.utils.files.BoundedBufferedReader;
import fr.insee.arc.utils.files.FileUtilsArc;
import fr.insee.arc.utils.utils.ManipString;

public class ChargementBrutTest extends ChargementBrut {

@Rule
public TemporaryFolder testFolder = new TemporaryFolder();


@Test
public void boundedBufferedReaderTestLimit() throws IOException, ArcException {
File root = testFolder.newFolder("root");
String repertoire = root.getAbsolutePath();

File fileTest = new File(repertoire, "test");
Files.copy(this.getClass().getClassLoader().getResourceAsStream("testFiles/siera_ano.xml"),
fileTest.toPath());

String line1;
String line2;
String line3;

try( FileInputStream is = new FileInputStream(fileTest);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);)
{
line1 = br.readLine();
line2 = br.readLine();
line3 = br.readLine();
}

try( FileInputStream is = new FileInputStream(fileTest);
InputStreamReader isr = new InputStreamReader(is);
BoundedBufferedReader br = new BoundedBufferedReader(isr);)
{
assertEquals(line1.substring(0, 5), br.readLine(5));
assertEquals(line2, br.readLine(10000));
assertEquals(line3.substring(0, 10), br.readLine(10));
}

FileUtilsArc.deleteDirectory(root);

}

@Test
public void requeteFichierBrutalementTest() throws IOException, ArcException {
File root = testFolder.newFolder("root");
String repertoire = root.getAbsolutePath();

File fileTest = new File(repertoire, "test");
Files.copy(this.getClass().getClassLoader().getResourceAsStream("testFiles/siera_ano.xml"),
fileTest.toPath());

this.maxNumberOfLinesToRead=10;
this.maxNumberOfCharacterByLineToRead=5;

String query;

try( FileInputStream is = new FileInputStream(fileTest);
InputStreamReader isr = new InputStreamReader(is);
BoundedBufferedReader br = new BoundedBufferedReader(isr);)
{
query = requeteFichierBrutalement("siera_ano", br);
}

System.out.println(query);

// extract id from query result
// the test checks implicitly that maxNumberOfCharacterByLineToRead=5
int id= Integer.valueOf(ManipString.substringBeforeLast(ManipString.substringAfterLast(query, "UNION ALL SELECT 'siera_ano',"),",'<n4ds'"));

assertEquals(maxNumberOfLinesToRead-1, id);

}




}
Loading

0 comments on commit 9a917ea

Please sign in to comment.