Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Groovy crawl configs #632

Merged
merged 1 commit into from
Dec 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions commons/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@
<artifactId>jsch</artifactId>
<version>0.2.21</version>
</dependency>
<dependency>
<groupId>org.apache.groovy</groupId>
<artifactId>groovy</artifactId>
<version>${groovy.version}</version>
</dependency>
</dependencies>
<build>
<resources>
Expand Down
46 changes: 44 additions & 2 deletions commons/src/main/java/org/archive/spring/PathSharingContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,15 @@
import org.apache.commons.io.FileUtils;
import org.archive.util.ArchiveUtils;
import org.springframework.beans.BeansException;
import org.springframework.beans.factory.BeanDefinitionStoreException;
import org.springframework.beans.factory.config.ConfigurableListableBeanFactory;
import org.springframework.beans.factory.groovy.GroovyBeanDefinitionReader;
import org.springframework.beans.factory.xml.XmlBeanDefinitionReader;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.AnnotationConfigUtils;
import org.springframework.context.support.FileSystemXmlApplicationContext;
import org.springframework.core.io.Resource;
import org.springframework.core.io.support.EncodedResource;
import org.springframework.validation.BeanPropertyBindingResult;
import org.springframework.validation.Errors;
import org.springframework.validation.Validator;
Expand All @@ -46,9 +52,13 @@
*
* Notable extensions:
*
* Remembers its primary XML configuration file, and can report its filesystem
* Remembers its primary configuration file, and can report its filesystem
* path.
*
*
* Supports both Spring XML and Groovy Bean Definition DSL.
*
* Automatically enables annotation processing (&lt;context:annotation-config/&gt;).
*
* Reports a summary of Errors collected from self-Validating Beans.
*
* Generates launchId from timestamp, creates launch directory
Expand Down Expand Up @@ -212,5 +222,37 @@ public ConcurrentHashMap<Object, Object> getData() {
return data;
}

/**
* Load bean definitions from XML or Groovy.
*/
@Override
protected void loadBeanDefinitions(XmlBeanDefinitionReader xmlReader) throws BeansException, IOException {
// This is essentially <context:annotation-config/>
// By doing it here we don't need to include it in every crawl config.
AnnotationConfigUtils.registerAnnotationConfigProcessors(xmlReader.getRegistry());

GroovyBeanDefinitionReader groovyReader = new GroovyBeanDefinitionReader(xmlReader.getRegistry()) {
// By default, the Groovy reader loads XML from .xml and Groovy for everything else, but
// Heritrix uses .cxml so we override it to only use the Groovy reader for .groovy files
// and the XML reader for everything else.
@Override
public int loadBeanDefinitions(EncodedResource encodedResource) throws BeanDefinitionStoreException {
String filename = encodedResource.getResource().getFilename();
if (filename != null && filename.endsWith(".groovy")) {
return super.loadBeanDefinitions(encodedResource);
}
return xmlReader.loadBeanDefinitions(encodedResource);
}
};
groovyReader.setEnvironment(getEnvironment());

Resource[] configResources = getConfigResources();
if (configResources != null) {
groovyReader.loadBeanDefinitions(configResources);
}
String[] configLocations = getConfigLocations();
if (configLocations != null) {
groovyReader.loadBeanDefinitions(configLocations);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package org.archive.spring;

import org.junit.Test;
import org.springframework.beans.factory.annotation.Autowired;

import static org.junit.Assert.*;

public class PathSharingContextTest {
@Test
public void testGroovyConfig() {
testConfig("groovy", "classpath:org/archive/spring/PathSharingContextTestBeans.groovy");
}

@Test
public void testXmlConfig() {
testConfig("xml", "classpath:org/archive/spring/PathSharingContextTestBeans.cxml");
}

private static void testConfig(String name, String configPath) {
try (var context = new PathSharingContext(configPath)) {
context.validate();
assertTrue("should be no validation errors", context.getAllErrors().isEmpty());
assertEquals("primaryConfiguationPath should be correct", configPath, context.getPrimaryConfigurationPath());
Bean1 bean1 = context.getBean("bean1", Bean1.class);
Bean2 bean2 = context.getBean("bean2", Bean2.class);
assertNotNull("bean1 should not be null", bean1);
assertNotNull("bean2 should not be null", bean2);
assertEquals("bean1.name should be set", name, bean1.name);
assertEquals("bean1 should be autowired into bean2", bean1, bean2.bean1);
}
}

public static class Bean1 {
private String name;

public void setName(String name) {
this.name = name;
}
}

public static class Bean2 {
private Bean1 bean1;

@Autowired
public void setBean1(Bean1 bean1) {
this.bean1 = bean1;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:context="http://www.springframework.org/schema/context"
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-3.0.xsd">

<context:annotation-config/>

<bean id="bean1" class="org.archive.spring.PathSharingContextTest$Bean1">
<property name="name" value="xml"/>
</bean>
<bean id="bean2" class="org.archive.spring.PathSharingContextTest$Bean2"/>
</beans>
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import org.archive.spring.PathSharingContextTest

beans {
bean1(PathSharingContextTest.Bean1) {
name = "groovy"
}
bean2(PathSharingContextTest.Bean2)
}
10 changes: 8 additions & 2 deletions engine/src/main/java/org/archive/crawler/framework/CrawlJob.java
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,14 @@ public void writeHtmlTo(PrintWriter pw, String uriPrefix) {
public void checkXML() {
// TODO: suppress check if XML unchanged? job.log when XML changed?

Instant testTime = Instant.ofEpochMilli(getPrimaryConfig().lastModified());
Document doc = getDomDocument(getPrimaryConfig());
File primaryConfig = getPrimaryConfig();
Instant testTime = Instant.ofEpochMilli(primaryConfig.lastModified());
if (primaryConfig.toString().endsWith(".groovy")) {
// just assume Groovy configs are OK
xmlOkAt = testTime;
return;
}
Document doc = getDomDocument(primaryConfig);
// TODO: check for other minimal requirements, like
// presence of a few key components (CrawlController etc.)?
if(doc!=null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ public boolean addJobDirectory(File dir) {
}
File[] candidateConfigs = dir.listFiles(new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.endsWith(".cxml");
return name.endsWith(".cxml") || name.equals("crawler-beans.groovy");
}});
if(candidateConfigs==null || candidateConfigs.length == 0) {
// no CXML file found!
Expand Down
Loading
Loading