Skip to content

Commit

Permalink
Reworked the entire project to use slf4j with either log4j 1.2 or log…
Browse files Browse the repository at this point in the history
…4j2 backend (depending on hadoop).
  • Loading branch information
Asger Askov Blekinge committed Nov 30, 2020
1 parent 7e2c3e1 commit 9276f6a
Show file tree
Hide file tree
Showing 101 changed files with 455 additions and 576 deletions.
37 changes: 1 addition & 36 deletions digipres-tika/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,7 @@
</plugins>
</build>
<dependencies>
<!-- This module uses slf4j, and the log4j bridge -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.30</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>log4j-over-slf4j</artifactId>
<version>1.7.30</version>
</dependency>

<dependency>
<!--slf4j simple for tests-->
<groupId>org.slf4j</groupId>
Expand All @@ -74,27 +64,6 @@
<scope>test</scope>
</dependency>

<!--These log dependencies are disabled (scope provided)-->
<!--This way, you do NOT need to add them as exclusions to all the dependencies below -->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
<version>2.13.2</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.13.2</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.2</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>com.typesafe</groupId>
<artifactId>config</artifactId>
Expand Down Expand Up @@ -208,10 +177,6 @@
<groupId>org.springframework</groupId>
<artifactId>spring-beans</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,15 @@
import java.util.HashMap;
import java.util.Map;

import org.apache.log4j.Logger;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

Expand All @@ -50,7 +51,7 @@
*
*/
public class PreservationParser extends AutoDetectParser {
private static Logger log = Logger.getLogger(PreservationParser.class.getName());
private static Logger log = LoggerFactory.getLogger(PreservationParser.class.getName());

public static final String EXT_MIME_TYPE = "Extended-MIME-Type";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
import java.io.InterruptedIOException;

import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;
import org.apache.tika.detect.CompositeDetector;
import org.apache.tika.detect.Detector;
import org.apache.tika.io.TikaInputStream;
Expand All @@ -42,6 +41,8 @@
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.WriteOutContentHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;


Expand All @@ -52,7 +53,7 @@
*/
public class TikaDeepIdentifier {

private static Logger log = Logger.getLogger(TikaDeepIdentifier.class.getName());
private static Logger log = LoggerFactory.getLogger(TikaDeepIdentifier.class.getName());

private static int MAX_BUF = 1024*1024;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;

import org.apache.log4j.Logger;
import org.apache.tika.detect.Detector;
import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.ByteArrayInputStream;
import java.io.IOException;
Expand All @@ -43,7 +44,7 @@
import java.util.HashMap;

public class HighlightJSDetector implements Detector {
private static Logger log = Logger.getLogger(HighlightJSDetector.class.getName());
private static Logger log = LoggerFactory.getLogger(HighlightJSDetector.class.getName());

/**
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@

import org.apache.jempbox.xmp.XMPMetadata;
import org.apache.jempbox.xmp.XMPSchemaPDF;
import org.apache.log4j.Logger;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
Expand All @@ -71,6 +70,8 @@
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.PasswordProvider;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
Expand All @@ -87,7 +88,7 @@
*/
public class PDFParser extends AbstractParser {

private static Logger log = Logger.getLogger(PDFParser.class);
private static Logger log = LoggerFactory.getLogger(PDFParser.class);

/** Serial version UID */
private static final long serialVersionUID = -752276948656079347L;
Expand Down
34 changes: 34 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,12 @@
</build>

<repositories>
<repository>
<id>version99</id>
<!-- highly available repository serving empty artifacts -->
<url>http://version99.qos.ch/</url>
</repository>

<repository>
<id>maven-restlet</id>
<name>Public online Restlet repository</name>
Expand Down Expand Up @@ -205,4 +211,32 @@
<url>https://repository.apache.org/snapshots/</url>
</repository>
</repositories>

<dependencies>
<!-- Logging-->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version.override}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
<version>${slf4j.version.override}</version>
</dependency>
</dependencies>
<dependencyManagement>
<dependencies>
<dependency><!--Per default replace commons logging with empty jar, for all dependencies-->
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>99-empty</version>
</dependency>
<dependency> <!--Ignore log4j dependencies. We only include log4j if we actually want to-->
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>99-empty</version>
</dependency>
</dependencies>
</dependencyManagement>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.httpclient.HttpParser;
import org.apache.commons.lang.SerializationUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
Expand Down Expand Up @@ -51,7 +51,7 @@
@SuppressWarnings({ "unchecked", "deprecation" })
public class PersistLogMapper extends MapReduceBase implements
Mapper<Text, WritableArchiveRecord, Text, Text> {
private static final Log LOGGER = LogFactory.getLog(PersistLogMapper.class);
private static final Logger LOGGER = LoggerFactory.getLogger(PersistLogMapper.class);

ArchiveRecordHeader responseHeader = null;
CrawlURI curi = null;
Expand Down
75 changes: 16 additions & 59 deletions warc-hadoop-indexer/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.1.0</version>
<version>3.3.0</version>
<configuration>
<descriptors>
<descriptor>src/main/assembly/hadoop-job.xml</descriptor>
Expand All @@ -46,6 +46,18 @@
</plugins>
</build>
<dependencies>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>${slf4j.version.override}</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>

<dependency>
<groupId>uk.bl.wa.discovery</groupId>
<artifactId>warc-hadoop-recordreaders</artifactId>
Expand All @@ -64,66 +76,17 @@
<version>3.2.0-SNAPSHOT</version>
<exclusions>
<exclusion>
<artifactId>logback-classic</artifactId>
<groupId>ch.qos.logback</groupId>
</exclusion>
<exclusion>
<artifactId>logback-core</artifactId>
<groupId>ch.qos.logback</groupId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>jul-to-slf4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>${slf4j.version.override}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-solrj</artifactId>
<version>${solr.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version.override}</version>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
Expand All @@ -135,12 +98,6 @@
<artifactId>hadoop-test</artifactId>
<version>${hadoop.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.mrunit</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
//import java.security.NoSuchAlgorithmException;
//import java.util.Properties;
//
//import org.apache.commons.logging.Log;
//import org.apache.commons.logging.LogFactory;
//import org.slf4j.Logger;
//import org.slf4j.LoggerFactory;
//import org.apache.hadoop.io.Text;
//import org.apache.log4j.PropertyConfigurator;
//import org.archive.io.ArchiveRecordHeader;
Expand All @@ -51,8 +51,8 @@
// */
//public class WebArchiveIndexerMapper extends
// SolrMapper<Text, WritableArchiveRecord> {
// private static final Log LOG = LogFactory
// .getLog(WebArchiveIndexerMapper.class);
// private static final Logger LOG = LoggerFactory
// .getLogger(WebArchiveIndexerMapper.class);
//
// private WARCIndexer windex;
//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
Expand All @@ -53,6 +51,8 @@
import com.typesafe.config.ConfigFactory;
import com.typesafe.config.ConfigRenderOptions;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.bl.wa.hadoop.ArchiveFileInputFormat;
import uk.bl.wa.hadoop.mapred.FrequencyCountingReducer;
import uk.bl.wa.util.ConfigPrinter;
Expand All @@ -67,8 +67,7 @@

@SuppressWarnings({ "deprecation" })
public class WARCDatasetGenerator extends Configured implements Tool {
private static final Log LOG = LogFactory
.getLog(WARCDatasetGenerator.class);
private static final Logger LOG = LoggerFactory.getLogger(WARCDatasetGenerator.class);
private static final String CLI_USAGE = "[-i <input file>] [-o <output dir>] [-c <config file>] [-d] [Dump config.] [-w] [Wait for completion.] [-x] [output XML in OAI-PMH format]";
private static final String CLI_HEADER = "WARCDatasetGenerator - MapReduce method for extracing datasets from ARCs and WARCs";
public static final String CONFIG_PROPERTIES = "warc_indexer_config";
Expand Down
Loading

0 comments on commit 9276f6a

Please sign in to comment.