diff --git a/CHANGELOG.md b/CHANGELOG.md index 2778a2997f2..c5f85bae2f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,78 @@ +## v2.43.3 - 2023-10-30 + +[Full Changelog](https://github.com/ORCID/ORCID-Source/compare/v2.43.2...v2.43.3) + +- [#6918](https://github.com/ORCID/ORCID-Source/pull/6918): 8889 prod looks like the ror loader is broken + +## v2.43.2 - 2023-10-25 + +[Full Changelog](https://github.com/ORCID/ORCID-Source/compare/v2.43.1...v2.43.2) + +- [#6916](https://github.com/ORCID/ORCID-Source/pull/6916): fix: Invert selfAsserted and validated in sort by source + +### Fix + +- Invert selfAsserted and validated in sort by source + +## v2.43.1 - 2023-10-25 + +[Full Changelog](https://github.com/ORCID/ORCID-Source/compare/v2.43.0...v2.43.1) + +- [#6914](https://github.com/ORCID/ORCID-Source/pull/6914): Shorter id + +## v2.43.0 - 2023-10-25 + +[Full Changelog](https://github.com/ORCID/ORCID-Source/compare/v2.42.6...v2.43.0) + +- [#6913](https://github.com/ORCID/ORCID-Source/pull/6913): feature: Add functionality to capture user events in the database and… + +### Fix + +- Add missing label and fix test + +### Feature + +- Add functionality to capture user events in the database and create views to display info in panoply + +## v2.42.6 - 2023-10-24 + +[Full Changelog](https://github.com/ORCID/ORCID-Source/compare/v2.42.5...v2.42.6) + +- [#6912](https://github.com/ORCID/ORCID-Source/pull/6912): add an option to configure timeout + +## v2.42.5 - 2023-10-23 + +[Full Changelog](https://github.com/ORCID/ORCID-Source/compare/v2.42.4...v2.42.5) + +- [#6909](https://github.com/ORCID/ORCID-Source/pull/6909): fix: Update sort by source functionality to sort also alphabetically + +### Fix + +- Update external identifier is validates method +- Update sort by source functionality to sort also alphabetically + +## v2.42.4 - 2023-10-20 + +[Full Changelog](https://github.com/ORCID/ORCID-Source/compare/v2.42.3...v2.42.4) + +- [#6907](https://github.com/ORCID/ORCID-Source/pull/6907): fix/remove-fundingSubType-core-details-from-codebase + +### Fix + +- remove fundingSubType core config manually removed from prod + +## v2.42.3 - 2023-10-20 + +[Full Changelog](https://github.com/ORCID/ORCID-Source/compare/v2.42.2...v2.42.3) + +- [#6911](https://github.com/ORCID/ORCID-Source/pull/6911): Initial commit + +## v2.42.2 - 2023-10-20 + +[Full Changelog](https://github.com/ORCID/ORCID-Source/compare/v2.42.1...v2.42.2) + +- [#6910](https://github.com/ORCID/ORCID-Source/pull/6910): 8855 duplicate isni external identifiers for same disambiguated organisation + ## v2.42.1 - 2023-10-13 [Full Changelog](https://github.com/ORCID/ORCID-Source/compare/v2.42.0...v2.42.1) diff --git a/orcid-api-common/src/main/resources/orcid-oauth2-api-common-config.xml b/orcid-api-common/src/main/resources/orcid-oauth2-api-common-config.xml index a97fca78480..89682ff1873 100644 --- a/orcid-api-common/src/main/resources/orcid-oauth2-api-common-config.xml +++ b/orcid-api-common/src/main/resources/orcid-oauth2-api-common-config.xml @@ -131,5 +131,6 @@ <constructor-arg index="1" value="${org.orcid.core.utils.cache.redis.port}" /> <constructor-arg index="2" value="${org.orcid.core.utils.cache.redis.password}" /> <constructor-arg index="3" value="${org.orcid.core.utils.cache.redis.expiration_in_secs:600}" /> + <constructor-arg index="4" value="${org.orcid.core.utils.cache.redis.connection_timeout_millis:10000}" /> </bean> </beans> \ No newline at end of file diff --git a/orcid-core/src/main/java/org/orcid/core/common/manager/EmailDomainManager.java b/orcid-core/src/main/java/org/orcid/core/common/manager/EmailDomainManager.java index b6c7e310adb..167e25e2d2f 100644 --- a/orcid-core/src/main/java/org/orcid/core/common/manager/EmailDomainManager.java +++ b/orcid-core/src/main/java/org/orcid/core/common/manager/EmailDomainManager.java @@ -2,14 +2,17 @@ import java.util.List; +import org.orcid.core.common.manager.impl.EmailDomainManagerImpl.STATUS; import org.orcid.persistence.jpa.entities.EmailDomainEntity; public interface EmailDomainManager { EmailDomainEntity createEmailDomain(String emailDomain, EmailDomainEntity.DomainCategory category); - + boolean updateCategory(long id, EmailDomainEntity.DomainCategory category); EmailDomainEntity findByEmailDoman(String emailDomain); List<EmailDomainEntity> findByCategory(EmailDomainEntity.DomainCategory category); + + STATUS createOrUpdateEmailDomain(String emailDomain, String rorId); } diff --git a/orcid-core/src/main/java/org/orcid/core/common/manager/EventManager.java b/orcid-core/src/main/java/org/orcid/core/common/manager/EventManager.java new file mode 100644 index 00000000000..eaecd912705 --- /dev/null +++ b/orcid-core/src/main/java/org/orcid/core/common/manager/EventManager.java @@ -0,0 +1,19 @@ +package org.orcid.core.common.manager; + +import org.orcid.core.utils.EventType; +import org.orcid.pojo.ajaxForm.RequestInfoForm; + +import javax.servlet.http.HttpServletRequest; + +/** + * + * @author Daniel Palafox + * + */ +public interface EventManager { + + boolean removeEvents(String orcid); + + void createEvent(String orcid, EventType eventType, HttpServletRequest request, RequestInfoForm requestInfoForm); + +} diff --git a/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EmailDomainManagerImpl.java b/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EmailDomainManagerImpl.java index b5950fb9f91..dcfe88f952b 100644 --- a/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EmailDomainManagerImpl.java +++ b/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EmailDomainManagerImpl.java @@ -13,20 +13,26 @@ public class EmailDomainManagerImpl implements EmailDomainManager { + public enum STATUS {CREATED, UPDATED}; + @Resource(name = "emailDomainDao") private EmailDomainDao emailDomainDao; @Resource(name = "emailDomainDaoReadOnly") private EmailDomainDao emailDomainDaoReadOnly; - @Override - public EmailDomainEntity createEmailDomain(String emailDomain, DomainCategory category) { + private void validateEmailDomain(String emailDomain) { if (emailDomain == null || emailDomain.isBlank()) { throw new IllegalArgumentException("Email Domain must not be empty"); } if(!InternetDomainName.isValid(emailDomain)) { throw new IllegalArgumentException("Email Domain '" + emailDomain + "' is invalid"); } + } + + @Override + public EmailDomainEntity createEmailDomain(String emailDomain, DomainCategory category) { + validateEmailDomain(emailDomain); if (category == null) { throw new IllegalArgumentException("Category must not be empty"); } @@ -57,4 +63,22 @@ public List<EmailDomainEntity> findByCategory(DomainCategory category) { return emailDomainDaoReadOnly.findByCategory(category); } + @Override + public STATUS createOrUpdateEmailDomain(String emailDomain, String rorId) { + EmailDomainEntity existingEntity = emailDomainDaoReadOnly.findByEmailDoman(emailDomain); + if(existingEntity != null) { + if(!rorId.equals(existingEntity.getRorId())) { + boolean updated = emailDomainDao.updateRorId(existingEntity.getId(), rorId); + if(updated) + return STATUS.UPDATED; + } + } else { + EmailDomainEntity newEntity = emailDomainDao.createEmailDomain(emailDomain, DomainCategory.PROFESSIONAL, rorId); + if (newEntity != null) { + return STATUS.CREATED; + } + } + return null; + } + } diff --git a/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EventManagerImpl.java b/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EventManagerImpl.java new file mode 100644 index 00000000000..de62d256865 --- /dev/null +++ b/orcid-core/src/main/java/org/orcid/core/common/manager/impl/EventManagerImpl.java @@ -0,0 +1,97 @@ +package org.orcid.core.common.manager.impl; + +import javax.annotation.Resource; +import javax.servlet.http.HttpServletRequest; + +import org.apache.commons.lang.StringUtils; +import org.orcid.core.common.manager.EventManager; +import org.orcid.core.constants.OrcidOauth2Constants; +import org.orcid.core.manager.ClientDetailsEntityCacheManager; +import org.orcid.core.utils.EventType; +import org.orcid.persistence.dao.EventDao; +import org.orcid.persistence.jpa.entities.ClientDetailsEntity; +import org.orcid.persistence.jpa.entities.EventEntity; +import org.orcid.pojo.ajaxForm.RequestInfoForm; + +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; + +/** + * + * @author Daniel Palafox + * + */ +public class EventManagerImpl implements EventManager { + + @Resource + private EventDao eventDao; + + @Resource + private ClientDetailsEntityCacheManager clientDetailsEntityCacheManager; + + @Override + public boolean removeEvents(String orcid) { + return eventDao.removeEvents(orcid); + } + + @Override + public void createEvent(String orcid, EventType eventType, HttpServletRequest request, RequestInfoForm requestInfoForm) { + String label = "Website"; + String clientId = null; + String redirectUrl = null; + String publicPage = null; + + switch (eventType) { + case PUBLIC_PAGE: + publicPage = orcid; + orcid = null; + break; + case REAUTHORIZE: + clientId = requestInfoForm.getClientId(); + redirectUrl = requestInfoForm.getRedirectUrl(); + label = "OAuth " + requestInfoForm.getClientName(); + break; + default: + if (request != null) { + Boolean isOauth2ScreensRequest = (Boolean) request.getSession().getAttribute(OrcidOauth2Constants.OAUTH_2SCREENS); + if (isOauth2ScreensRequest != null && isOauth2ScreensRequest) { + String queryString = (String) request.getSession().getAttribute(OrcidOauth2Constants.OAUTH_QUERY_STRING); + clientId = getParameterValue(queryString, "client_id"); + redirectUrl = getParameterValue(queryString, "redirect_uri"); + ClientDetailsEntity clientDetailsEntity = clientDetailsEntityCacheManager.retrieve(clientId); + label = "OAuth " + clientDetailsEntity.getClientName(); + } + } + } + + EventEntity eventEntity = new EventEntity(); + + eventEntity.setOrcid(orcid); + eventEntity.setEventType(eventType.getValue()); + eventEntity.setClientId(clientId); + eventEntity.setRedirectUrl(redirectUrl); + eventEntity.setLabel(label); + eventEntity.setPublicPage(publicPage); + + eventDao.createEvent(eventEntity); + } + + private String getParameterValue(String queryString, String parameter) { + if (StringUtils.isNotEmpty(queryString)) { + try { + queryString = URLDecoder.decode(queryString, StandardCharsets.UTF_8.toString()); + } catch (UnsupportedEncodingException u) { + // l + } + String[] parameters = queryString.split("&"); + for (String p : parameters) { + String[] keyValuePair = p.split("="); + if (parameter.equals(keyValuePair[0])) { + return keyValuePair[1]; + } + } + } + return null; + } +} diff --git a/orcid-core/src/main/java/org/orcid/core/manager/OrgDisambiguatedManager.java b/orcid-core/src/main/java/org/orcid/core/manager/OrgDisambiguatedManager.java index 3dd8c72cad0..76851b41b23 100644 --- a/orcid-core/src/main/java/org/orcid/core/manager/OrgDisambiguatedManager.java +++ b/orcid-core/src/main/java/org/orcid/core/manager/OrgDisambiguatedManager.java @@ -36,5 +36,7 @@ public interface OrgDisambiguatedManager { void createOrgDisambiguatedExternalIdentifier(OrgDisambiguatedExternalIdentifierEntity identifier); public List<OrgDisambiguated> findOrgDisambiguatedIdsForSameExternalIdentifier(String identifier, String type); + + public void cleanDuplicatedExternalIdentifiersForOrgDisambiguated(OrgDisambiguatedEntity orgDisambiguatedEntity); } diff --git a/orcid-core/src/main/java/org/orcid/core/manager/impl/OrgDisambiguatedManagerImpl.java b/orcid-core/src/main/java/org/orcid/core/manager/impl/OrgDisambiguatedManagerImpl.java index 2a074b34149..03faaeb9208 100644 --- a/orcid-core/src/main/java/org/orcid/core/manager/impl/OrgDisambiguatedManagerImpl.java +++ b/orcid-core/src/main/java/org/orcid/core/manager/impl/OrgDisambiguatedManagerImpl.java @@ -11,6 +11,7 @@ import javax.annotation.Resource; import javax.transaction.Transactional; +import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.orcid.core.manager.OrgDisambiguatedManager; import org.orcid.core.messaging.JmsMessageSender; @@ -52,7 +53,7 @@ public class OrgDisambiguatedManagerImpl implements OrgDisambiguatedManager { @Resource private OrgDao orgDao; - + @Resource private OrgDisambiguatedExternalIdentifierDao orgDisambiguatedExternalIdentifierDao; @@ -68,6 +69,9 @@ public class OrgDisambiguatedManagerImpl implements OrgDisambiguatedManager { @Value("${org.orcid.persistence.messaging.updated.disambiguated_org.solr:indexDisambiguatedOrgs}") private String updateSolrQueueName; + @Value("${org.orcid.core.cleanExtIdsForOrg:false}") + private boolean cleanDuplicateExtIdForOrg; + @Resource(name = "jmsMessageSender") private JmsMessageSender messaging; @@ -76,7 +80,7 @@ public class OrgDisambiguatedManagerImpl implements OrgDisambiguatedManager { @Value("${org.orcid.persistence.messaging.updated.disambiguated_org.indexing.batchSize:1000}") private int indexingBatchSize; - + @Override synchronized public void processOrgsForIndexing() { LOGGER.info("About to process disambiguated orgs for indexing"); @@ -103,7 +107,7 @@ synchronized public void markOrgsForIndexingAsGroup() { entities = orgDisambiguatedDaoReadOnly.findOrgsToGroup(startIndex, indexingBatchSize); LOGGER.info("GROUP: Found chunk of {} disambiguated orgs for indexing as group", entities.size()); for (OrgDisambiguatedEntity entity : entities) { - + new OrgGrouping(entity, this).markGroupForIndexing(orgDisambiguatedDao); } startIndex = startIndex + indexingBatchSize; @@ -147,7 +151,7 @@ private OrgDisambiguatedSolrDocument convertEntityToDocument(OrgDisambiguatedEnt document.setOrgDisambiguatedPopularity(entity.getPopularity()); Set<String> orgNames = new HashSet<>(); orgNames.add(entity.getName()); - + List<OrgEntity> orgs = orgDao.findByOrgDisambiguatedId(entity.getId()); if (orgs != null) { for (OrgEntity org : orgs) { @@ -219,6 +223,9 @@ public List<OrgDisambiguated> searchOrgsFromSolrForSelfService(String searchTerm @Override public OrgDisambiguatedEntity updateOrgDisambiguated(OrgDisambiguatedEntity orgDisambiguatedEntity) { normalizeExternalIdentifiers(orgDisambiguatedEntity); + if (cleanDuplicateExtIdForOrg) { + cleanDuplicatedExternalIdentifiersForOrgDisambiguated(orgDisambiguatedEntity); + } return orgDisambiguatedDao.merge(orgDisambiguatedEntity); } @@ -262,7 +269,27 @@ public OrgDisambiguated findInDB(String idValue, String idType) { @Override public void createOrgDisambiguatedExternalIdentifier(OrgDisambiguatedExternalIdentifierEntity identifier) { normalizeExternalIdentifier(identifier); - orgDisambiguatedExternalIdentifierDao.persist(identifier); + boolean toPersist = true; + OrgDisambiguatedEntity orgDisambiguatedEntity = identifier.getOrgDisambiguated(); + if (orgDisambiguatedEntity != null && orgDisambiguatedEntity.getExternalIdentifiers() != null) { + String extIdentifierKeyToAdd = identifier.getIdentifierType() + "::" + identifier.getIdentifier(); + String extIdentifierKey; + for (OrgDisambiguatedExternalIdentifierEntity identifier1 : orgDisambiguatedEntity.getExternalIdentifiers()) { + extIdentifierKey = identifier1.getIdentifierType() + "::" + identifier1.getIdentifier(); + if (StringUtils.equals(extIdentifierKeyToAdd, extIdentifierKey)) { + toPersist = false; + break; + } + } + } + if (cleanDuplicateExtIdForOrg) { + cleanDuplicatedExternalIdentifiersForOrgDisambiguated(orgDisambiguatedEntity); + } + // check if in the current external id list the identifier already + if (toPersist) { + orgDisambiguatedExternalIdentifierDao.persist(identifier); + } + } @Override @@ -271,17 +298,16 @@ public void updateOrgDisambiguatedExternalIdentifier(OrgDisambiguatedExternalIde orgDisambiguatedExternalIdentifierDao.merge(identifier); } - public List<OrgDisambiguated> findOrgDisambiguatedIdsForSameExternalIdentifier( String identifier, String type ) { + public List<OrgDisambiguated> findOrgDisambiguatedIdsForSameExternalIdentifier(String identifier, String type) { List<OrgDisambiguated> orgDisambiguatedIds = new ArrayList<OrgDisambiguated>(); List<OrgDisambiguatedExternalIdentifierEntity> extIds = orgDisambiguatedExternalIdentifierDao.findByIdentifierIdAndType(identifier, type); - extIds.stream().forEach((e) -> - { - OrgDisambiguatedEntity de = e.getOrgDisambiguated(); - // Group only if it is not a RINGGOLD org - if(de != null && !OrgDisambiguatedSourceType.RINGGOLD.name().equals(de.getSourceType())) { - orgDisambiguatedIds.add(convertEntity(de)); - } - }); + extIds.stream().forEach((e) -> { + OrgDisambiguatedEntity de = e.getOrgDisambiguated(); + // Group only if it is not a RINGGOLD org + if (de != null && !OrgDisambiguatedSourceType.RINGGOLD.name().equals(de.getSourceType())) { + orgDisambiguatedIds.add(convertEntity(de)); + } + }); return orgDisambiguatedIds; } @@ -347,4 +373,39 @@ private void normalizeExternalIdentifiers(OrgDisambiguatedEntity orgDisambiguate } } + public void cleanDuplicatedExternalIdentifiersForOrgDisambiguated(OrgDisambiguatedEntity orgDisambiguatedEntity) { + if (orgDisambiguatedEntity.getExternalIdentifiers() != null) { + HashMap<String, OrgDisambiguatedExternalIdentifierEntity> extIdsMapping = new HashMap<String, OrgDisambiguatedExternalIdentifierEntity>(); + String extIdentifierKey; + OrgDisambiguatedExternalIdentifierEntity mappedExtIdentifier; + List<OrgDisambiguatedExternalIdentifierEntity> duplicatedExtIdentifiersToBeRemoved = new ArrayList<OrgDisambiguatedExternalIdentifierEntity>(); + for (OrgDisambiguatedExternalIdentifierEntity identifier : orgDisambiguatedEntity.getExternalIdentifiers()) { + extIdentifierKey = identifier.getIdentifierType() + "::" + identifier.getIdentifier(); + if (extIdsMapping.containsKey(extIdentifierKey)) { + + if (!identifier.getPreferred()) { + duplicatedExtIdentifiersToBeRemoved.add(identifier); + } else { + mappedExtIdentifier = extIdsMapping.get(extIdentifierKey); + duplicatedExtIdentifiersToBeRemoved.add(mappedExtIdentifier); + extIdsMapping.put(extIdentifierKey, identifier); + } + + } + } + // remove the duplicates from DB + + LOGGER.info( + "About to remove " + duplicatedExtIdentifiersToBeRemoved.size() + " duplicate external Ids for Disambiguated Org " + orgDisambiguatedEntity.getId()); + duplicatedExtIdentifiersToBeRemoved.stream().forEach((e) -> { + try { + orgDisambiguatedExternalIdentifierDao.remove(e); + LOGGER.debug("Removed ext id " + e.getIdentifierType() + "::" + e.getIdentifier() + "::" + e.getId()); + } catch (Exception ex) { + LOGGER.error("Exception when removing duplicate external ids for Disambiguated Org " + orgDisambiguatedEntity.getId(), ex); + } + }); + + } + } } diff --git a/orcid-core/src/main/java/org/orcid/core/solr/OrcidSolrOrgsClient.java b/orcid-core/src/main/java/org/orcid/core/solr/OrcidSolrOrgsClient.java index e30eaf806fb..be4c57fa21a 100644 --- a/orcid-core/src/main/java/org/orcid/core/solr/OrcidSolrOrgsClient.java +++ b/orcid-core/src/main/java/org/orcid/core/solr/OrcidSolrOrgsClient.java @@ -33,7 +33,7 @@ public class OrcidSolrOrgsClient { private static final String SOLR_SELF_SERVICE_ORGS_QUERY = "(org-disambiguated-id-from-source:%s)^50.0 (org-disambiguated-name%s)^50.0 (org-disambiguated-name-string:%s)^25.0"; - + private static final String SOLR_ORG_BY_ROR_ID_QUERY = "org-disambiguated-id-from-source:%s"; public OrgDisambiguatedSolrDocument findById(Long id) { SolrQuery query = new SolrQuery(); @@ -93,4 +93,19 @@ public List<OrgDisambiguatedSolrDocument> getOrgsForSelfService(String searchTer throw new NonTransientDataAccessResourceException(errorMessage, se); } } + + public OrgDisambiguatedSolrDocument getOrgByRorId(String rorId) { + SolrQuery query = new SolrQuery(); + // Escape the : on the email domain to be able to search in solr + query.setQuery(SOLR_ORG_BY_ROR_ID_QUERY.replace("%s", rorId.replace(":", "\\:"))); + query.addOrUpdateSort("score", ORDER.desc); + try { + QueryResponse queryResponse = solrReadOnlyOrgsClient.query(query); + List<OrgDisambiguatedSolrDocument> result = queryResponse.getBeans(OrgDisambiguatedSolrDocument.class); + return (result == null || result.isEmpty()) ? null : result.get(0); + } catch (SolrServerException | IOException se) { + String errorMessage = MessageFormat.format("Error when attempting to search for orgs by ror id, with ror id {0}", new Object[] { rorId }); + throw new NonTransientDataAccessResourceException(errorMessage, se); + } + } } diff --git a/orcid-core/src/main/java/org/orcid/core/togglz/Features.java b/orcid-core/src/main/java/org/orcid/core/togglz/Features.java index df17060321d..dbb655fab98 100644 --- a/orcid-core/src/main/java/org/orcid/core/togglz/Features.java +++ b/orcid-core/src/main/java/org/orcid/core/togglz/Features.java @@ -5,6 +5,9 @@ import org.togglz.core.context.FeatureContext; public enum Features implements Feature { + @Label("Track user events") + EVENTS, + @Label("Source sorting") SOURCE_SORTING, diff --git a/orcid-core/src/main/java/org/orcid/core/utils/EventType.java b/orcid-core/src/main/java/org/orcid/core/utils/EventType.java new file mode 100644 index 00000000000..d5c248dd171 --- /dev/null +++ b/orcid-core/src/main/java/org/orcid/core/utils/EventType.java @@ -0,0 +1,20 @@ +package org.orcid.core.utils; + +public enum EventType { + SIGN_IN("Sign-In"), + NEW_REGISTRATION("New-Registration"), + AUTHORIZE("Authorize"), + AUTHORIZE_DENY("Authorize-Deny"), + REAUTHORIZE("Reauthorize"), + PUBLIC_PAGE("Public-Page"); + + private final String value; + + EventType(String v) { + value = v; + } + + public String getValue() { + return value; + } +} diff --git a/orcid-core/src/main/java/org/orcid/core/utils/cache/redis/RedisClient.java b/orcid-core/src/main/java/org/orcid/core/utils/cache/redis/RedisClient.java index 42affcf8f47..bc09b7f234d 100644 --- a/orcid-core/src/main/java/org/orcid/core/utils/cache/redis/RedisClient.java +++ b/orcid-core/src/main/java/org/orcid/core/utils/cache/redis/RedisClient.java @@ -11,6 +11,7 @@ import org.orcid.utils.alerting.SlackManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; import redis.clients.jedis.DefaultJedisClientConfig; import redis.clients.jedis.HostAndPort; @@ -24,7 +25,7 @@ public class RedisClient { private static final Logger LOG = LoggerFactory.getLogger(RedisClient.class); private static final int DEFAULT_CACHE_EXPIRY = 60; - private static final int DEFAULT_TIMEOUT = 5000; + private static final int DEFAULT_TIMEOUT = 10000; private final String redisHost; private final int redisPort; @@ -37,7 +38,7 @@ public class RedisClient { @Resource private SlackManager slackManager; - // Assume the connection to Redis is disabled by default + // Assume the connection to Redis is disabled by default private boolean enabled = false; public RedisClient(String redisHost, int redisPort, String password) { @@ -66,10 +67,6 @@ public RedisClient(String redisHost, int redisPort, String password, int cacheEx @PostConstruct private void init() { - if(!enabled) { - LOG.debug("Redis is not enabled, so, it will not be initilized"); - return; - } try { JedisClientConfig config = DefaultJedisClientConfig.builder().connectionTimeoutMillis(this.clientTimeoutInMillis).timeoutMillis(this.clientTimeoutInMillis) .socketTimeoutMillis(this.clientTimeoutInMillis).password(this.redisPassword).ssl(true).build(); diff --git a/orcid-core/src/main/java/org/orcid/core/utils/v3/SourceUtils.java b/orcid-core/src/main/java/org/orcid/core/utils/v3/SourceUtils.java index 04f10cac2bc..35c8dc4dc1d 100644 --- a/orcid-core/src/main/java/org/orcid/core/utils/v3/SourceUtils.java +++ b/orcid-core/src/main/java/org/orcid/core/utils/v3/SourceUtils.java @@ -316,11 +316,7 @@ public static boolean isSelfAsserted(Source source, String orcid) { assertionOriginOrcid = source.getAssertionOriginOrcid().getPath(); } // If the affiliation source is the user himself or any member with OBO, then, it is considered self asserted - if(orcid.equals(sourceId) || orcid.equals(assertionOriginOrcid)) { - return false; - } else { - return true; - } + return orcid.equals(sourceId) || orcid.equals(assertionOriginOrcid); } public static boolean isSelfAsserted(AffiliationForm af, String orcid) { diff --git a/orcid-core/src/main/java/org/orcid/core/utils/v3/activities/FundingComparators.java b/orcid-core/src/main/java/org/orcid/core/utils/v3/activities/FundingComparators.java index 3f24dce975d..ae0864cd3ab 100644 --- a/orcid-core/src/main/java/org/orcid/core/utils/v3/activities/FundingComparators.java +++ b/orcid-core/src/main/java/org/orcid/core/utils/v3/activities/FundingComparators.java @@ -2,6 +2,9 @@ import java.util.Collections; import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.orcid.core.utils.v3.SourceUtils; import org.orcid.pojo.ajaxForm.FundingForm; @@ -15,16 +18,8 @@ public class FundingComparators { private final String TYPE_SORT_KEY = "type"; - private final String SOURCE_SORT_KEY = "source"; - - private String orcid = null; - public FundingComparators() {} - public FundingComparators(String orcid) { - this.orcid = orcid; - } - public Comparator<FundingGroup> getInstance(String key, boolean sortAsc, String orcid) { Comparator<FundingGroup> comparator = null; if (DATE_SORT_KEY.equals(key)) { @@ -33,8 +28,6 @@ public Comparator<FundingGroup> getInstance(String key, boolean sortAsc, String comparator = new FundingComparators().TITLE_COMPARATOR; } else if (TYPE_SORT_KEY.equals(key)) { comparator = new FundingComparators().TYPE_COMPARATOR; - } else if (SOURCE_SORT_KEY.equals(key)) { - comparator = new FundingComparators(orcid).SOURCE_COMPARATOR; } if (sortAsc) { @@ -107,9 +100,19 @@ public Comparator<FundingGroup> getInstance(String key, boolean sortAsc, String return g1.getStartDate().compareTo(g2.getStartDate()); }; - public Comparator<FundingGroup> SOURCE_COMPARATOR = (g1, g2) -> Boolean.compare(isSelfAsserted(g1), isSelfAsserted(g2)); + public List<FundingGroup> sortBySource(List<FundingGroup> fundingGroups, boolean sortAsc, String orcid) { + List<FundingGroup> selfAsserted = fundingGroups.stream() + .filter(fundingGroup -> SourceUtils.isSelfAsserted(fundingGroup.getDefaultFunding(), orcid)) + .collect(Collectors.toList()); + + List<FundingGroup> validated = fundingGroups.stream() + .filter(fundingGroup -> !SourceUtils.isSelfAsserted(fundingGroup.getDefaultFunding(), orcid)) + .collect(Collectors.toList()); + + selfAsserted.sort(new FundingComparators().TITLE_COMPARATOR); + validated.sort(new FundingComparators().TITLE_COMPARATOR); - private boolean isSelfAsserted(FundingGroup fundingGroup) { - return SourceUtils.isSelfAsserted(fundingGroup.getDefaultFunding(), orcid); + return (sortAsc ? Stream.concat(validated.stream(), selfAsserted.stream()) : Stream.concat(selfAsserted.stream(), validated.stream())) + .collect(Collectors.toList()); } } diff --git a/orcid-core/src/main/java/org/orcid/pojo/summary/ExternalIdentifiersSummary.java b/orcid-core/src/main/java/org/orcid/pojo/summary/ExternalIdentifiersSummary.java index d4593ba47a7..61fab224d86 100644 --- a/orcid-core/src/main/java/org/orcid/pojo/summary/ExternalIdentifiersSummary.java +++ b/orcid-core/src/main/java/org/orcid/pojo/summary/ExternalIdentifiersSummary.java @@ -86,7 +86,7 @@ public static ExternalIdentifiersSummary valueOf(PersonExternalIdentifier person } if (personExternalIdentifier.getSource() != null) { - form.setValidated(SourceUtils.isSelfAsserted(personExternalIdentifier.getSource(), orcid)); + form.setValidated(!SourceUtils.isSelfAsserted(personExternalIdentifier.getSource(), orcid)); } } return form; diff --git a/orcid-core/src/main/resources/orcid-core-context.xml b/orcid-core/src/main/resources/orcid-core-context.xml index 76f109dc3ca..5b0c52ac219 100644 --- a/orcid-core/src/main/resources/orcid-core-context.xml +++ b/orcid-core/src/main/resources/orcid-core-context.xml @@ -1212,8 +1212,11 @@ <constructor-arg index="1" value="${org.orcid.core.utils.cache.redis.port}" /> <constructor-arg index="2" value="${org.orcid.core.utils.cache.redis.password}" /> <constructor-arg index="3" value="${org.orcid.core.utils.cache.redis.expiration_in_secs:600}" /> + <constructor-arg index="4" value="${org.orcid.core.utils.cache.redis.connection_timeout_millis:10000}" /> </bean> + <bean id="eventManager" class="org.orcid.core.common.manager.impl.EventManagerImpl"/> + <bean id="emailDomainManager" class="org.orcid.core.common.manager.impl.EmailDomainManagerImpl"/> </beans> diff --git a/orcid-core/src/test/java/org/orcid/core/common/manager/EmailDomainManagerTest.java b/orcid-core/src/test/java/org/orcid/core/common/manager/EmailDomainManagerTest.java index bc347b9977f..de67c61336a 100644 --- a/orcid-core/src/test/java/org/orcid/core/common/manager/EmailDomainManagerTest.java +++ b/orcid-core/src/test/java/org/orcid/core/common/manager/EmailDomainManagerTest.java @@ -4,7 +4,11 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -16,6 +20,7 @@ import org.mockito.Mock; import org.mockito.MockitoAnnotations; import org.orcid.core.common.manager.impl.EmailDomainManagerImpl; +import org.orcid.core.common.manager.impl.EmailDomainManagerImpl.STATUS; import org.orcid.persistence.dao.EmailDomainDao; import org.orcid.persistence.jpa.entities.EmailDomainEntity; import org.orcid.persistence.jpa.entities.EmailDomainEntity.DomainCategory; @@ -38,12 +43,17 @@ public void before() { EmailDomainEntity e1 = new EmailDomainEntity("gmail.com", DomainCategory.PERSONAL); EmailDomainEntity e2 = new EmailDomainEntity("yahoo.com", DomainCategory.PERSONAL); - EmailDomainEntity e3 = new EmailDomainEntity("orcid.org", DomainCategory.PROFESSIONAL); + EmailDomainEntity e3 = new EmailDomainEntity("orcid.org", DomainCategory.PROFESSIONAL, "https://ror.org/04fa4r544"); + e3.setId(1000L); when(emailDomainDaoReadOnlyMock.findByCategory(eq(DomainCategory.PERSONAL))).thenReturn(List.of(e1, e2)); when(emailDomainDaoReadOnlyMock.findByCategory(eq(DomainCategory.PROFESSIONAL))).thenReturn(List.of(e3)); when(emailDomainDaoReadOnlyMock.findByEmailDoman("gmail.com")).thenReturn(e1); + when(emailDomainDaoReadOnlyMock.findByEmailDoman("orcid.org")).thenReturn(e3); + + when(emailDomainDaoMock.createEmailDomain(eq("new.domain"), eq(DomainCategory.PROFESSIONAL), eq("https://ror.org/0"))).thenReturn(new EmailDomainEntity("new.domain", DomainCategory.PROFESSIONAL, "https://ror.org/0")); + when(emailDomainDaoMock.updateRorId(1000L, "https://ror.org/0")).thenReturn(true); } @Test(expected = IllegalArgumentException.class) @@ -135,4 +145,28 @@ public void findByCategory_TwoResultsTest() { assertEquals(DomainCategory.PERSONAL, personal.get(1).getCategory()); assertEquals("yahoo.com", personal.get(1).getEmailDomain()); } + + @Test + public void createOrUpdateEmailDomain_CreateTest() { + STATUS s = edm.createOrUpdateEmailDomain("new.domain", "https://ror.org/0"); + assertEquals(STATUS.CREATED, s); + verify(emailDomainDaoMock, times(1)).createEmailDomain(eq("new.domain"), eq(DomainCategory.PROFESSIONAL), eq("https://ror.org/0")); + verify(emailDomainDaoMock, never()).updateRorId(anyLong(), anyString()); + } + + @Test + public void createOrUpdateEmailDomain_UpdateTest() { + STATUS s = edm.createOrUpdateEmailDomain("orcid.org", "https://ror.org/0"); + assertEquals(STATUS.UPDATED, s); + verify(emailDomainDaoMock, times(1)).updateRorId(eq(1000L), eq("https://ror.org/0")); + verify(emailDomainDaoMock, never()).createEmailDomain(anyString(), any(), anyString()); + } + + @Test + public void createOrUpdateEmailDomain_NoUpdatesTest() { + STATUS s = edm.createOrUpdateEmailDomain("orcid.org", "https://ror.org/04fa4r544"); + assertNull(s); + verify(emailDomainDaoMock, never()).updateRorId(anyLong(), anyString()); + verify(emailDomainDaoMock, never()).createEmailDomain(anyString(), any(), anyString()); + } } diff --git a/orcid-persistence/src/main/java/org/orcid/persistence/dao/EmailDomainDao.java b/orcid-persistence/src/main/java/org/orcid/persistence/dao/EmailDomainDao.java index bb5c5730a53..93ea7a3bc35 100644 --- a/orcid-persistence/src/main/java/org/orcid/persistence/dao/EmailDomainDao.java +++ b/orcid-persistence/src/main/java/org/orcid/persistence/dao/EmailDomainDao.java @@ -6,8 +6,12 @@ public interface EmailDomainDao extends GenericDao<EmailDomainEntity, Long> { EmailDomainEntity createEmailDomain(String emailDomain, EmailDomainEntity.DomainCategory category); + + EmailDomainEntity createEmailDomain(String emailDomain, EmailDomainEntity.DomainCategory category, String rorId); boolean updateCategory(long id, EmailDomainEntity.DomainCategory category); + + boolean updateRorId(long id, String rorId); EmailDomainEntity findByEmailDoman(String emailDomain); diff --git a/orcid-persistence/src/main/java/org/orcid/persistence/dao/EventDao.java b/orcid-persistence/src/main/java/org/orcid/persistence/dao/EventDao.java new file mode 100644 index 00000000000..99787a759c5 --- /dev/null +++ b/orcid-persistence/src/main/java/org/orcid/persistence/dao/EventDao.java @@ -0,0 +1,20 @@ +package org.orcid.persistence.dao; + +import org.orcid.persistence.jpa.entities.EventEntity; + +import java.util.List; + +/** + * + * @author Daniel Palafox + * + */ +public interface EventDao extends GenericDao<EventEntity, Long>{ + + boolean removeEvents(String orcid); + + List<EventEntity> getEvents(String orcid); + + void createEvent(EventEntity eventEntity); + +} diff --git a/orcid-persistence/src/main/java/org/orcid/persistence/dao/impl/EmailDomainDaoImpl.java b/orcid-persistence/src/main/java/org/orcid/persistence/dao/impl/EmailDomainDaoImpl.java index 5a8416d1e3c..6c75eec184f 100644 --- a/orcid-persistence/src/main/java/org/orcid/persistence/dao/impl/EmailDomainDaoImpl.java +++ b/orcid-persistence/src/main/java/org/orcid/persistence/dao/impl/EmailDomainDaoImpl.java @@ -31,6 +31,18 @@ public EmailDomainEntity createEmailDomain(String emailDomain, DomainCategory ca entityManager.persist(e); return e; } + + @Override + @Transactional + public EmailDomainEntity createEmailDomain(String emailDomain, DomainCategory category, String rorId) { + LOG.debug("Creating domain {} with category {} and ror Id {}", emailDomain, category, rorId); + EmailDomainEntity e = new EmailDomainEntity(); + e.setEmailDomain(emailDomain); + e.setCategory(category); + e.setRorId(rorId); + entityManager.persist(e); + return e; + } @Override @Transactional @@ -42,6 +54,16 @@ public boolean updateCategory(long id, DomainCategory category) { return query.executeUpdate() > 0; } + @Override + @Transactional + public boolean updateRorId(long id, String rorId) { + LOG.debug("Updating domain with id {} with rorId {}", id, rorId); + Query query = entityManager.createNativeQuery("UPDATE email_domain SET ror_id=:rorId WHERE id = :id"); + query.setParameter("id", id); + query.setParameter("rorId", rorId.toString()); + return query.executeUpdate() > 0; + } + @Override public EmailDomainEntity findByEmailDoman(String emailDomain) { TypedQuery<EmailDomainEntity> query = entityManager.createQuery("from EmailDomainEntity where emailDomain = :emailDomain", EmailDomainEntity.class); diff --git a/orcid-persistence/src/main/java/org/orcid/persistence/dao/impl/EventDaoImpl.java b/orcid-persistence/src/main/java/org/orcid/persistence/dao/impl/EventDaoImpl.java new file mode 100644 index 00000000000..2d74fee4168 --- /dev/null +++ b/orcid-persistence/src/main/java/org/orcid/persistence/dao/impl/EventDaoImpl.java @@ -0,0 +1,45 @@ +package org.orcid.persistence.dao.impl; + +import org.orcid.persistence.aop.UpdateProfileLastModified; +import org.orcid.persistence.dao.EventDao; +import org.orcid.persistence.jpa.entities.EmailEntity; +import org.orcid.persistence.jpa.entities.EventEntity; +import org.orcid.persistence.jpa.entities.SpamEntity; +import org.springframework.transaction.annotation.Transactional; + +import javax.persistence.Query; +import javax.persistence.TypedQuery; +import java.util.List; + +/** + * @author Daniel Palafox + */ +public class EventDaoImpl extends GenericDaoImpl<EventEntity, Long> implements EventDao { + + public EventDaoImpl() { + super(EventEntity.class); + } + + @Override + public List<EventEntity> getEvents(String orcid) { + TypedQuery<EventEntity> query = entityManager.createQuery("from EventEntity where orcid=:orcid", EventEntity.class); + query.setParameter("orcid", orcid); + List<EventEntity> results = query.getResultList(); + return results.isEmpty() ? null : results; + } + + @Override + @Transactional + public void createEvent(EventEntity eventEntity) { + entityManager.persist(eventEntity); + } + + @Override + @Transactional + public boolean removeEvents(String orcid) { + Query query = entityManager.createQuery("delete from EventEntity where orcid = :orcid"); + query.setParameter("orcid", orcid); + query.executeUpdate(); + return query.executeUpdate() > 0; + } +} diff --git a/orcid-persistence/src/main/java/org/orcid/persistence/jpa/entities/EmailDomainEntity.java b/orcid-persistence/src/main/java/org/orcid/persistence/jpa/entities/EmailDomainEntity.java index 06ea938e449..c676f19cea8 100644 --- a/orcid-persistence/src/main/java/org/orcid/persistence/jpa/entities/EmailDomainEntity.java +++ b/orcid-persistence/src/main/java/org/orcid/persistence/jpa/entities/EmailDomainEntity.java @@ -28,6 +28,7 @@ public static enum DomainCategory {PERSONAL, PROFESSIONAL, UNDEFINED} private Long id; private String emailDomain; private DomainCategory category; + private String rorId; public EmailDomainEntity() { @@ -38,6 +39,12 @@ public EmailDomainEntity(String emailDomain, DomainCategory category) { this.category = category; } + public EmailDomainEntity(String emailDomain, DomainCategory category, String rorId) { + this.emailDomain = emailDomain; + this.category = category; + this.rorId = rorId; + } + @Override @Id @GeneratedValue(strategy = GenerationType.AUTO, generator = "email_domain_seq") @@ -70,9 +77,18 @@ public void setEmailDomain(String emailDomain) { this.emailDomain = emailDomain; } + @Column(name = "ror_id") + public String getRorId() { + return rorId; + } + + public void setRorId(String rorId) { + this.rorId = rorId; + } + @Override public int hashCode() { - return Objects.hash(category, emailDomain, id); + return Objects.hash(category, emailDomain, id, rorId); } @Override @@ -84,6 +100,6 @@ public boolean equals(Object obj) { if (getClass() != obj.getClass()) return false; EmailDomainEntity other = (EmailDomainEntity) obj; - return category == other.category && Objects.equals(emailDomain, other.emailDomain) && Objects.equals(id, other.id); - } + return category == other.category && Objects.equals(emailDomain, other.emailDomain) && Objects.equals(id, other.id) && Objects.equals(rorId, other.rorId); + } } diff --git a/orcid-persistence/src/main/java/org/orcid/persistence/jpa/entities/EventEntity.java b/orcid-persistence/src/main/java/org/orcid/persistence/jpa/entities/EventEntity.java new file mode 100644 index 00000000000..019a710041c --- /dev/null +++ b/orcid-persistence/src/main/java/org/orcid/persistence/jpa/entities/EventEntity.java @@ -0,0 +1,73 @@ +package org.orcid.persistence.jpa.entities; + +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.SequenceGenerator; +import javax.persistence.Table; + +/** + * + * @author Daniel Palafox + * + */ +@Entity +@Table(name = "event") +public class EventEntity extends BaseEntity<Long> implements OrcidAware { + private static final long serialVersionUID = 1L; + private Long id; + private String orcid; + private String eventType; + private String clientId; + private String redirectUrl; + private String label; + private String publicPage; + + @Id + @Column(name = "id") + @GeneratedValue(strategy = GenerationType.AUTO, generator = "event_seq") + @SequenceGenerator(name = "event_seq", sequenceName = "event_seq", allocationSize = 1) + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + @Column(name = "orcid") + public String getOrcid() { + return orcid; + } + + public void setOrcid(String orcid) { + this.orcid = orcid; + } + + @Column(name = "event_type") + public String getEventType() { return eventType; } + + public void setEventType(String eventType) { this.eventType = eventType; } + + @Column(name = "client_id") + public String getClientId() { return clientId; } + + public void setClientId(String client_id) { this.clientId = client_id; } + + @Column(name = "redirect_url") + public String getRedirectUrl() { return redirectUrl; } + + public void setRedirectUrl(String redirect_url) { this.redirectUrl = redirect_url; } + + @Column(name = "label") + public String getLabel() { return label; } + + public void setLabel(String label) { this.label = label; } + + @Column(name = "public_page") + public String getPublicPage() { return publicPage; } + + public void setPublicPage(String public_page) { this.publicPage = public_page; } +} diff --git a/orcid-persistence/src/main/resources/META-INF/persistence.xml b/orcid-persistence/src/main/resources/META-INF/persistence.xml index 97491923958..c3af43f7ae1 100644 --- a/orcid-persistence/src/main/resources/META-INF/persistence.xml +++ b/orcid-persistence/src/main/resources/META-INF/persistence.xml @@ -60,6 +60,7 @@ <class>org.orcid.persistence.jpa.entities.ResearchResourceItemEntity</class> <class>org.orcid.persistence.jpa.entities.FindMyStuffHistoryEntity</class> <class>org.orcid.persistence.jpa.entities.SpamEntity</class> + <class>org.orcid.persistence.jpa.entities.EventEntity</class> <class>org.orcid.persistence.jpa.entities.EmailDomainEntity</class> <!-- OAuth entities --> @@ -105,4 +106,4 @@ </persistence-unit> -</persistence> \ No newline at end of file +</persistence> diff --git a/orcid-persistence/src/main/resources/db-master.xml b/orcid-persistence/src/main/resources/db-master.xml index 7949aeaa856..f2c56e0432d 100644 --- a/orcid-persistence/src/main/resources/db-master.xml +++ b/orcid-persistence/src/main/resources/db-master.xml @@ -374,4 +374,7 @@ <include file="/db/updates/identifier-types/update-ethos-to-be-case-sensitive.xml" /> <include file="/db/updates/dw_client_details_add_user_obo_enabled.xml" /> <include file="/db/updates/create_email_domain_mapping_tables.xml" /> -</databaseChangeLog> \ No newline at end of file + <include file="/db/updates/create_event_table.xml" /> + <include file="/db/updates/dw_event.xml" /> + <include file="/db/updates/create_event_indexes.xml" /> +</databaseChangeLog> diff --git a/orcid-persistence/src/main/resources/db/updates/create_email_domain_mapping_tables.xml b/orcid-persistence/src/main/resources/db/updates/create_email_domain_mapping_tables.xml index 02d6c9d413a..bd23c345b21 100644 --- a/orcid-persistence/src/main/resources/db/updates/create_email_domain_mapping_tables.xml +++ b/orcid-persistence/src/main/resources/db/updates/create_email_domain_mapping_tables.xml @@ -20,53 +20,25 @@ <column name="category" type="VARCHAR(16)"> <constraints nullable="false" /> </column> - <column name="date_created" type="TIMESTAMP WITH TIME ZONE" /> - <column name="last_modified" type="TIMESTAMP WITH TIME ZONE" /> - </createTable> - </changeSet> - - <changeSet author="Angel Montenegro" id="CREATE-EMAIL-DOMAIN-TO-ORG-ID-TABLE"> - <preConditions onFail="MARK_RAN"> - <not> - <tableExists tableName="email_domain_to_org_id" /> - </not> - </preConditions> - - <createTable tableName="email_domain_to_org_id"> - <column name="id" type="bigint"> - <constraints nullable="false" primaryKey="true" - primaryKeyName="email_domain_to_org_id_pkey" /> - </column> - <column name="email_domian_id" type="bigint"> - <constraints nullable="false" /> - </column> - <column name="org_disambiguated_id" type="bigint"> - <constraints nullable="false" /> + <column name="ror_id" type="VARCHAR(30)"> + <constraints nullable="true" /> </column> <column name="date_created" type="TIMESTAMP WITH TIME ZONE" /> <column name="last_modified" type="TIMESTAMP WITH TIME ZONE" /> </createTable> - - <sql>ALTER TABLE email_domain_to_org_id ADD CONSTRAINT email_domain_fk FOREIGN KEY (email_domian_id) REFERENCES email_domain (id);</sql> - <sql>ALTER TABLE email_domain_to_org_id ADD CONSTRAINT org_disambiguated_id_fk FOREIGN KEY (org_disambiguated_id) REFERENCES org_disambiguated (id);</sql> - <sql>create index email_domain_to_org_id_domain_index on email_domain_to_org_id(email_domian_id);</sql> - <sql>create index email_domain_to_org_id_org_index on email_domain_to_org_id(org_disambiguated_id);</sql> </changeSet> - + <changeSet id="CREATE-SEQUENCES" author="Angel Montenegro" dbms="postgresql"> <preConditions onFail="MARK_RAN"> <not> <sequenceExists sequenceName="email_domain_seq"/> - <sequenceExists sequenceName="email_domain_to_org_id_seq"/> </not> </preConditions> <createSequence sequenceName="email_domain_seq" startValue="1000" /> - <createSequence sequenceName="email_domain_to_org_id_seq" startValue="1000" /> </changeSet> <changeSet id="CREATE-AUTOCOLS" author="Angel Montenegro" dbms="hsqldb"> <addAutoIncrement tableName="email_domain" columnName="id" columnDataType="bigint"/> - <addAutoIncrement tableName="email_domain_to_org_id" columnName="id" columnDataType="bigint"/> </changeSet> <changeSet id="EMAIL-DOMAIN-INDEX" author="Angel Montenegro" dbms="postgresql"> @@ -76,11 +48,11 @@ </not> </preConditions> <sql>create index email_domain_domain_index on email_domain(email_domain);</sql> + <sql>create index email_domain_ror_id_index on email_domain(ror_id);</sql> </changeSet> <changeSet id="GRANT-READ-PERMISSIONS-TO-ORCIDRO" author="Angel Montenegro" dbms="postgresql"> - <sql>GRANT SELECT ON email_domain to orcidro;</sql> - <sql>GRANT SELECT ON email_domain_to_org_id to orcidro;</sql> + <sql>GRANT SELECT ON email_domain to orcidro;</sql> </changeSet> </databaseChangeLog> \ No newline at end of file diff --git a/orcid-persistence/src/main/resources/db/updates/create_event_indexes.xml b/orcid-persistence/src/main/resources/db/updates/create_event_indexes.xml new file mode 100644 index 00000000000..619501a3428 --- /dev/null +++ b/orcid-persistence/src/main/resources/db/updates/create_event_indexes.xml @@ -0,0 +1,31 @@ +<databaseChangeLog xmlns="http://www.liquibase.org/xml/ns/dbchangelog" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-2.0.xsd"> + + <changeSet id="EVENT-ORCID-INDEX" author="Daniel Palafox"> + <preConditions onFail="MARK_RAN"> + <not> + <indexExists indexName="event_orcid_index" tableName="event"/> + </not> + </preConditions> + <sql>create index event_orcid_index on event(orcid);</sql> + </changeSet> + + <changeSet id="EVENT-CLIENT-ID-INDEX" author="Daniel Palafox"> + <preConditions onFail="MARK_RAN"> + <not> + <indexExists indexName="event_client_id_index" tableName="event"/> + </not> + </preConditions> + <sql>create index event_client_id_index on event(client_id);</sql> + </changeSet> + + <changeSet id="EVENT-TYPE-INDEX" author="Daniel Palafox"> + <preConditions onFail="MARK_RAN"> + <not> + <indexExists indexName="event_type_index" tableName="event"/> + </not> + </preConditions> + <sql>create index event_type_index on event(event_type);</sql> + </changeSet> +</databaseChangeLog> diff --git a/orcid-persistence/src/main/resources/db/updates/create_event_table.xml b/orcid-persistence/src/main/resources/db/updates/create_event_table.xml new file mode 100644 index 00000000000..a350620aeeb --- /dev/null +++ b/orcid-persistence/src/main/resources/db/updates/create_event_table.xml @@ -0,0 +1,45 @@ +<databaseChangeLog xmlns="http://www.liquibase.org/xml/ns/dbchangelog" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-2.0.xsd"> + + <changeSet id="CREATE-EVENT-TABLE" author="Daniel Palafox"> + <createTable tableName="event"> + <column name="id" type="bigint"> + <constraints nullable="false" primaryKey="true" + primaryKeyName="event_pkey"/> + </column> + <column name="orcid" type="VARCHAR(19)"/> + <column name="event_type" type="VARCHAR(20)"/> + <column name="client_id" type="VARCHAR(255)"/> + <column name="redirect_url" type="VARCHAR(255)"/> + <column name="label" type="VARCHAR(255)"/> + <column name="public_page" type="VARCHAR(19)"/> + <column name="date_created" type="TIMESTAMP WITH TIME ZONE"/> + <column name="last_modified" type="TIMESTAMP WITH TIME ZONE"/> + </createTable> + + <addForeignKeyConstraint constraintName="event_orcid_fk" baseTableName="event" + baseColumnNames="orcid" referencedTableName="profile" referencedColumnNames="orcid"/> + </changeSet> + + <changeSet id="CREATE-EVENT-SEQUENCES" author="Daniel Palafox"> + <preConditions onFail="MARK_RAN"> + <not> + <sequenceExists sequenceName="event_seq"/> + </not> + </preConditions> + <createSequence sequenceName="event_seq"/> + </changeSet> + + <changeSet id="CREATE-EVENT-AUTOCOLS" author="Daniel Palafox" dbms="hsqldb"> + <addAutoIncrement tableName="event" columnName="id" columnDataType="bigint"/> + </changeSet> + + <changeSet id="GRANT-READ-PERMISSIONS-TO-ORCIDRO-ON-EVENT" author="Daniel Palafox" dbms="postgresql"> + <preConditions> + <sqlCheck expectedResult="1">SELECT 1 FROM pg_roles WHERE rolname='orcidro'</sqlCheck> + </preConditions> + <sql>GRANT SELECT ON event to orcidro;</sql> + </changeSet> + +</databaseChangeLog> diff --git a/orcid-persistence/src/main/resources/db/updates/dw_event.xml b/orcid-persistence/src/main/resources/db/updates/dw_event.xml new file mode 100644 index 00000000000..91b80aad0b4 --- /dev/null +++ b/orcid-persistence/src/main/resources/db/updates/dw_event.xml @@ -0,0 +1,45 @@ +<databaseChangeLog xmlns="http://www.liquibase.org/xml/ns/dbchangelog" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-2.0.xsd"> + + <changeSet id="CREATE-DW-EVENT-VIEW-GROUP-BY-DAY-CLIENT_ID-AND-EVENT_TYPE" author="Daniel Palafox" dbms="postgresql"> + <preConditions onFail="MARK_RAN"> + <not><viewExists viewName="dw_event"/></not> + </preConditions> + <createView viewName="dw_event"> + SELECT event_type, client_id, COUNT(id), DATE_TRUNC('day', date_created) + FROM event + WHERE event_type != 'Public-Page' + GROUP BY event_type, client_id, DATE_TRUNC('day', date_created) + ORDER BY DATE_TRUNC('day', date_created) DESC; + </createView> + </changeSet> + + <changeSet id="GRANT-READ-TO-DW_USER-TO-DW_EVENT-VIEW" author="Daniel Palafox" dbms="postgresql"> + <preConditions> + <sqlCheck expectedResult="1">SELECT 1 FROM pg_roles WHERE rolname='dw_user'</sqlCheck> + </preConditions> + <sql>GRANT SELECT ON TABLE dw_event to dw_user;</sql> + </changeSet> + + <changeSet id="CREATE-DW-EVENT-VIEW-GROUP-BY-PUBLIC-PAGE" author="Daniel Palafox" dbms="postgresql"> + <preConditions onFail="MARK_RAN"> + <not><viewExists viewName="dw_event_public_page"/></not> + </preConditions> + <createView viewName="dw_event_public_page"> + SELECT event_type, public_page, COUNT(id), DATE_TRUNC('day', date_created) + FROM event + WHERE event_type = 'Public-Page' + GROUP BY event_type, public_page, DATE_TRUNC('day', date_created) + ORDER BY DATE_TRUNC('day', date_created) DESC; + </createView> + </changeSet> + + <changeSet id="GRANT-READ-TO-DW-EVENT-VIEW-GROUP-BY-PUBLIC-PAGE" author="Daniel Palafox" dbms="postgresql"> + <preConditions> + <sqlCheck expectedResult="1">SELECT 1 FROM pg_roles WHERE rolname='dw_user'</sqlCheck> + </preConditions> + <sql>GRANT SELECT ON TABLE dw_event_public_page to dw_user;</sql> + </changeSet> + +</databaseChangeLog> diff --git a/orcid-persistence/src/main/resources/orcid-persistence-context.xml b/orcid-persistence/src/main/resources/orcid-persistence-context.xml index d55dba5c1c4..b1de603e0f7 100644 --- a/orcid-persistence/src/main/resources/orcid-persistence-context.xml +++ b/orcid-persistence/src/main/resources/orcid-persistence-context.xml @@ -449,21 +449,25 @@ </bean> <util:properties id="notification_queries" location="classpath:queries/notifications.xml" /> - <bean id="researchResourceDao" class="org.orcid.persistence.dao.impl.ResearchResourceDaoImpl"/> - <bean id="findMyStuffHistoryDao" class="org.orcid.persistence.dao.impl.FindMyStuffHistoryDaoImpl"/> + <bean id="researchResourceDao" class="org.orcid.persistence.dao.impl.ResearchResourceDaoImpl"/> + <bean id="findMyStuffHistoryDao" class="org.orcid.persistence.dao.impl.FindMyStuffHistoryDaoImpl"/> - <bean id="spamDao" class="org.orcid.persistence.dao.impl.SpamDaoImpl" /> + <bean id="spamDao" class="org.orcid.persistence.dao.impl.SpamDaoImpl" /> <bean id="spamDaoReadOnly" class="org.orcid.persistence.dao.impl.SpamDaoImpl"> <property name="entityManager" ref="entityManagerReadOnly" /> </bean> - + + <bean id="eventDao" class="org.orcid.persistence.dao.impl.EventDaoImpl" /> + + <bean id="emailDomainDao" class="org.orcid.persistence.dao.impl.EmailDomainDaoImpl" /> <bean id="emailDomainDaoReadOnly" class="org.orcid.persistence.dao.impl.EmailDomainDaoImpl"> <property name="entityManager" ref="entityManagerReadOnly" /> </bean> + <!-- Statistics --> <bean id="statisticsDao" class="org.orcid.persistence.dao.impl.StatisticsDaoImpl"> <property name="entityManager" ref="entityManager" /> diff --git a/orcid-persistence/src/test/java/org/orcid/persistence/EventDaoTest.java b/orcid-persistence/src/test/java/org/orcid/persistence/EventDaoTest.java new file mode 100644 index 00000000000..84ff1491e49 --- /dev/null +++ b/orcid-persistence/src/test/java/org/orcid/persistence/EventDaoTest.java @@ -0,0 +1,77 @@ +package org.orcid.persistence; + +import org.apache.commons.lang3.reflect.FieldUtils; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.orcid.persistence.dao.EventDao; +import org.orcid.persistence.dao.SpamDao; +import org.orcid.persistence.jpa.entities.EventEntity; +import org.orcid.persistence.jpa.entities.SourceType; +import org.orcid.persistence.jpa.entities.SpamEntity; +import org.orcid.test.DBUnitTest; +import org.orcid.test.OrcidJUnit4ClassRunner; +import org.springframework.test.context.ContextConfiguration; + +import javax.annotation.Resource; +import javax.persistence.NoResultException; +import javax.transaction.Transactional; +import java.util.Arrays; +import java.util.Date; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +@RunWith(OrcidJUnit4ClassRunner.class) +@ContextConfiguration(inheritInitializers = false, inheritLocations = false, locations = {"classpath:test-orcid-persistence-context.xml"}) +public class EventDaoTest extends DBUnitTest { + + private static String USER_ORCID = "4444-4444-4444-4497"; + private static String OTHER_USER_ORCID = "4444-4444-4444-4499"; + + @Resource(name = "eventDao") + private EventDao eventDao; + + @BeforeClass + public static void initDBUnitData() throws Exception { + initDBUnitData(Arrays.asList("/data/SourceClientDetailsEntityData.xml", "/data/ProfileEntityData.xml", "/data/EventEntityData.xml")); + } + + @AfterClass + public static void removeDBUnitData() throws Exception { + removeDBUnitData(Arrays.asList("/data/EventEntityData.xml", "/data/ProfileEntityData.xml", "/data/SourceClientDetailsEntityData.xml")); + } + + @Test + @Transactional + public void testFindByOrcid() { + List<EventEntity> eventEntityList = eventDao.getEvents(OTHER_USER_ORCID); + assertNotNull(eventEntityList); + assertEquals(OTHER_USER_ORCID, eventEntityList.get(0).getOrcid()); + } + + @Test + public void testWriteSpam() throws IllegalAccessException { + EventEntity eventEntity = new EventEntity(); + eventEntity.setEventType("Sign-In"); + Date date = new Date(); + FieldUtils.writeField(eventEntity, "dateCreated", date, true); + FieldUtils.writeField(eventEntity, "lastModified", date, true); + eventEntity.setOrcid(USER_ORCID); + + eventDao.createEvent(eventEntity); + + List<EventEntity> eventEntities = eventDao.getEvents(USER_ORCID); + assertNotNull(eventEntities); + assertEquals(USER_ORCID, eventEntities.get(0).getOrcid()); + } + + @Test + public void testRemoveSpam() throws NoResultException { + List<EventEntity> eventEntities = eventDao.getEvents(USER_ORCID); + assertNotNull(eventEntities); + eventDao.removeEvents(eventEntities.get(0).getOrcid()); + } +} diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainLoader.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainLoader.java index 30977c1abfe..f7517c6a436 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainLoader.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainLoader.java @@ -97,9 +97,11 @@ private void process() { total += 1; } } - LOG.warn("List of invalid domains:"); - for(String invalidDomain : invalidDomains) { - LOG.warn(invalidDomain); + if(!invalidDomains.isEmpty()) { + LOG.warn("List of invalid domains:"); + for(String invalidDomain : invalidDomains) { + LOG.info(invalidDomain); + } } LOG.info("Process done, total: {}, new entities: {}, updated entities: {}", total, newEntities, updatedEntities); } diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java new file mode 100644 index 00000000000..8037f5d62d0 --- /dev/null +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/cli/EmailDomainToRorLoader.java @@ -0,0 +1,186 @@ +package org.orcid.scheduler.loader.cli; + +import java.io.FileReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.orcid.core.common.manager.EmailDomainManager; +import org.orcid.core.common.manager.impl.EmailDomainManagerImpl.STATUS; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.context.ApplicationContext; +import org.springframework.context.support.ClassPathXmlApplicationContext; + +import com.fasterxml.jackson.databind.MappingIterator; +import com.fasterxml.jackson.dataformat.csv.CsvMapper; +import com.fasterxml.jackson.dataformat.csv.CsvParser; + +public class EmailDomainToRorLoader { + + private static final Logger LOG = LoggerFactory.getLogger(EmailDomainToRorLoader.class); + + private String filePath; + private EmailDomainManager emailDomainManager; + private List<List<String>> csvData; + private Set<String> invalidDomains = new HashSet<String>(); + private Map<String, DomainToRorMap> map = new HashMap<String, DomainToRorMap>(); + + private int updatedEntries = 0; + private int createdEntries = 0; + + public EmailDomainToRorLoader(String filePath) { + this.filePath = filePath; + init(filePath); + } + + public void execute() throws IOException { + load(this.filePath); + processCsvData(); + storeDomainToRorMap(); + } + + private void init(String filePath) { + Path path = Paths.get(filePath); + if(!Files.exists(path)) { + LOG.error("File does not exists: '{}'", filePath); + System.exit(1); + } + + @SuppressWarnings("resource") + ApplicationContext context = new ClassPathXmlApplicationContext("orcid-core-context.xml"); + emailDomainManager = (EmailDomainManager) context.getBean("emailDomainManager"); + } + + private void load(String filePath) throws IOException { + LOG.info("Reading file {}", filePath); + FileReader fileReader = new FileReader(filePath); + CsvMapper csvMapper = new CsvMapper(); + csvMapper.enable(CsvParser.Feature.WRAP_AS_ARRAY); + csvMapper.enable(CsvParser.Feature.TRIM_SPACES); + + MappingIterator<List<String>> it = csvMapper.readerForListOf(String.class).readValues(fileReader); + + if (it != null) { + csvData = new ArrayList<List<String>>(); + while(it.hasNext()) { + List<String> r = it.next(); + // Hack to avoid adding empty lines if they are present, we need at least 2 columns, the domain and the ror id + if(r.size() > 1) + csvData.add(r); + } + } + fileReader.close(); + } + + private void processCsvData() { + for (List<String> row : csvData) { + String domain = row.get(0); + String rorId = row.get(1); + boolean hasParent = false; + try { + String hasParentField = row.get(2); + hasParent = hasParentField == null ? false : Boolean.valueOf(hasParentField); + } catch(IndexOutOfBoundsException eoob) { + // Leave the hasParent as false + } + + if(!map.containsKey(domain)) { + DomainToRorMap dtrm = new DomainToRorMap(); + dtrm.setDomain(domain); + if(hasParent) { + dtrm.addIdWithParent(rorId); + } else { + dtrm.addIdWithNoParent(rorId); + } + map.put(domain, dtrm); + } else { + DomainToRorMap dtrm = map.get(domain); + if(hasParent) { + dtrm.addIdWithParent(rorId); + } else { + dtrm.addIdWithNoParent(rorId); + } + } + } + } + + private void storeDomainToRorMap() { + for(DomainToRorMap element : map.values()) { + LOG.debug("Processing domain {}", element.getDomain()); + // If the domain has only one entry with no parent, store that one + if(element.getIdsWithNoParent().size() == 1) { + STATUS s = emailDomainManager.createOrUpdateEmailDomain(element.getDomain(), element.getIdsWithNoParent().get(0)); + if(STATUS.CREATED.equals(s)) { + createdEntries++; + } else if (STATUS.UPDATED.equals(s)) { + updatedEntries++; + } + } else if(element.getIdsWithParent().size() == 1) { + // Else, if the domain has only one entry with parent, store that one + STATUS s = emailDomainManager.createOrUpdateEmailDomain(element.getDomain(), element.getIdsWithParent().get(0)); + if(STATUS.CREATED.equals(s)) { + createdEntries++; + } else if (STATUS.UPDATED.equals(s)) { + updatedEntries++; + } + } else { + // Else log a warning because there is no way to provide a suggestion + invalidDomains.add(element.getDomain()); + } + } + + if(!invalidDomains.isEmpty()) { + LOG.warn("The following domains couldn't be mapped ({} In total):", invalidDomains.size()); + for(String invalidDomain : invalidDomains) { + LOG.warn("{}", invalidDomain); + } + } + LOG.info("Created entries: {}, updated entries: {}", createdEntries, updatedEntries); + } + + private class DomainToRorMap { + private String domain; + private List<String> idsWithParent = new ArrayList<String>(); + private List<String> idsWithNoParent = new ArrayList<String>(); + + public void setDomain(String domain) { + this.domain = domain; + } + + public String getDomain() { + return this.domain; + } + + public void addIdWithParent(String rorId) { + LOG.debug("Domain {} adding {} with parent flag", this.domain, rorId); + idsWithParent.add(rorId); + } + + public List<String> getIdsWithParent() { + return this.idsWithParent; + } + + public void addIdWithNoParent(String rorId) { + LOG.debug("Domain {} adding {} with NO parent flag", this.domain, rorId); + idsWithNoParent.add(rorId); + } + + public List<String> getIdsWithNoParent() { + return this.idsWithNoParent; + } + } + + public static void main(String[] args) throws IOException { + String filePath = args[0]; + EmailDomainToRorLoader edl = new EmailDomainToRorLoader(filePath); + edl.execute(); + } +} diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/io/OrgDataClient.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/io/OrgDataClient.java index 0355cb11b20..8b1a791a14b 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/io/OrgDataClient.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/io/OrgDataClient.java @@ -32,9 +32,9 @@ public class OrgDataClient { */ public <T> T get(String url, String userAgent, Class<T> type) { JerseyClientResponse<T, String> response = jerseyClientHelperForOrgLoaders.executeGetRequest(url, null, null, false, Map.of(), Map.of("User-Agent", userAgent), type, String.class); - int status = response.getStatus(); + int status = response.getStatus(); if (status != 200) { - LOGGER.warn("Unable to fetch file {}: {}", new Object[] { url, status }); + LOGGER.error("Unable to fetch file {}: {}", new Object[] { url, status }); return null; } return response.getEntity(); diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/ror/RorOrgLoadSource.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/ror/RorOrgLoadSource.java index 37a9840c5f1..a42ddc906c5 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/ror/RorOrgLoadSource.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/ror/RorOrgLoadSource.java @@ -79,7 +79,7 @@ public class RorOrgLoadSource implements OrgLoadSource { @Resource private OrgDisambiguatedExternalIdentifierDao orgDisambiguatedExternalIdentifierDao; - @Value("${org.orcid.core.orgs.ror.zenodoRecordsUrl:https://zenodo.org/api/records/?communities=ror-data}") + @Value("${org.orcid.core.orgs.ror.zenodoRecordsUrl:https://zenodo.org/api/records?communities=ror-data}") private String rorZenodoRecordsUrl; @Resource diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecords.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecords.java index f3227f51c6c..9056f740542 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecords.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecords.java @@ -6,7 +6,7 @@ @JsonInclude(JsonInclude.Include.NON_NULL) -@JsonIgnoreProperties(value = { "aggregations", "links" }) +@JsonIgnoreProperties(ignoreUnknown = true) public class ZenodoRecords { @JsonProperty("hits") diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecordsFile.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecordsFile.java index 130fedfb488..3b38579f44e 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecordsFile.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecordsFile.java @@ -1,7 +1,9 @@ package org.orcid.scheduler.loader.source.zenodo.api; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; +@JsonIgnoreProperties(ignoreUnknown = true) public class ZenodoRecordsFile { @JsonProperty("bucket") diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecordsFileLinks.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecordsFileLinks.java index 50e505ed29a..c101e896b74 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecordsFileLinks.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecordsFileLinks.java @@ -1,7 +1,9 @@ package org.orcid.scheduler.loader.source.zenodo.api; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; +@JsonIgnoreProperties(ignoreUnknown = true) public class ZenodoRecordsFileLinks { @JsonProperty("self") diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecordsHit.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecordsHit.java index 31a0b352a7e..6c8caaafb7d 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecordsHit.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecordsHit.java @@ -7,7 +7,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; @JsonInclude(JsonInclude.Include.NON_NULL) -@JsonIgnoreProperties(value = { "metadata", "owners", "stats", "revision" }) +@JsonIgnoreProperties(ignoreUnknown = true) public class ZenodoRecordsHit { @JsonProperty("conceptdoi") diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecordsHitLinks.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecordsHitLinks.java index 57ba1b1e46f..f33ec19d1c9 100644 --- a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecordsHitLinks.java +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/zenodo/api/ZenodoRecordsHitLinks.java @@ -1,7 +1,9 @@ package org.orcid.scheduler.loader.source.zenodo.api; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; +@JsonIgnoreProperties(ignoreUnknown = true) public class ZenodoRecordsHitLinks { @JsonProperty("badge") diff --git a/orcid-test/src/main/java/org/orcid/test/DBUnitTest.java b/orcid-test/src/main/java/org/orcid/test/DBUnitTest.java index bbb0a72ddec..470ba8ab1ae 100644 --- a/orcid-test/src/main/java/org/orcid/test/DBUnitTest.java +++ b/orcid-test/src/main/java/org/orcid/test/DBUnitTest.java @@ -43,7 +43,7 @@ public class DBUnitTest { "org_disambiguated", "org_disambiguated_external_identifier", "org", "org_affiliation_relation", "profile_funding", "funding_external_identifier", "address", "institution", "affiliation", "notification", "client_details", "client_secret", "oauth2_token_detail", "custom_email", "webhook", "granted_authority", "orcid_props", "peer_review", "peer_review_subject", "shibboleth_account", "group_id_record", "invalid_record_data_changes", - "research_resource","research_resource_item, spam", "backup_code", "profile_history_event"}; + "research_resource","research_resource_item, spam", "backup_code", "profile_history_event", "event"}; private static ApplicationContext context; @@ -156,6 +156,7 @@ private static void cleanClientSourcedProfiles(IDatabaseConnection connection) t dataSet.addTable("research_resource"); dataSet.addTable("find_my_stuff_history"); dataSet.addTable("spam"); + dataSet.addTable("event"); DatabaseOperation.DELETE.execute(connection, dataSet); QueryDataSet theRest = new QueryDataSet(connection); @@ -190,4 +191,4 @@ public static IDataSet getDataSet(String flatXMLDataFile) { return ds; } -} \ No newline at end of file +} diff --git a/orcid-test/src/main/resources/data/EventEntityData.xml b/orcid-test/src/main/resources/data/EventEntityData.xml new file mode 100644 index 00000000000..c5fa142a18e --- /dev/null +++ b/orcid-test/src/main/resources/data/EventEntityData.xml @@ -0,0 +1,27 @@ +<?xml version='1.0' encoding='UTF-8'?> +<dataset> + <event id="1" + orcid="4444-4444-4444-4499" + event_type="Sign-In" + label="Website" + date_created="2023-01-01 15:31:00.00" + last_modified="2023-07-02 15:31:00.00" + /> + + <event id="2" + orcid="0000-0000-0000-0003" + event_type="Sign-In" + label="Website" + date_created="2023-01-01 15:31:00.00" + last_modified="2023-07-02 15:31:00.00" + /> + + <event id="3" + orcid="0000-0000-0000-0004" + event_type="Sign-In" + label="Website" + date_created="2023-01-01 15:31:00.00" + last_modified="2023-07-02 15:31:00.00" + /> + +</dataset> diff --git a/orcid-web/src/main/java/org/orcid/frontend/oauth2/OauthController.java b/orcid-web/src/main/java/org/orcid/frontend/oauth2/OauthController.java index f5278118110..0b24e1ec2f5 100644 --- a/orcid-web/src/main/java/org/orcid/frontend/oauth2/OauthController.java +++ b/orcid-web/src/main/java/org/orcid/frontend/oauth2/OauthController.java @@ -12,6 +12,7 @@ import javax.annotation.Resource; import javax.servlet.http.HttpServletRequest; +import org.orcid.core.common.manager.EventManager; import org.orcid.core.constants.OrcidOauth2Constants; import org.orcid.core.exception.ClientDeactivatedException; import org.orcid.core.exception.LockedException; @@ -21,6 +22,8 @@ import org.orcid.core.oauth.OrcidRandomValueTokenServices; import org.orcid.core.oauth.service.OrcidAuthorizationEndpoint; import org.orcid.core.oauth.service.OrcidOAuth2RequestValidator; +import org.orcid.core.togglz.Features; +import org.orcid.core.utils.EventType; import org.orcid.frontend.web.controllers.BaseControllerUtil; import org.orcid.frontend.web.controllers.helper.OauthHelper; import org.orcid.frontend.web.exception.OauthInvalidRequestException; @@ -75,6 +78,9 @@ public class OauthController { @Resource(name = "profileEntityManagerV3") private ProfileEntityManager profileEntityManager; + @Resource + private EventManager eventManager; + @RequestMapping(value = { "/oauth/custom/init.json" }, method = RequestMethod.POST) public @ResponseBody RequestInfoForm loginGetHandler(HttpServletRequest request, Map<String, Object> model, @RequestParam Map<String, String> requestParameters, SessionStatus sessionStatus, Principal principal) throws UnsupportedEncodingException { @@ -90,6 +96,9 @@ public class OauthController { List<String> responseParam = parameters.get(requestInfoForm.getResponseType()); if (responseParam != null && !responseParam.isEmpty() && !PojoUtil.isEmpty(responseParam.get(0))) { isResponseSet = true; + if (Features.EVENTS.isActive()) { + eventManager.createEvent(requestInfoForm.getUserOrcid(), EventType.REAUTHORIZE, null, requestInfoForm); + } } } diff --git a/orcid-web/src/main/java/org/orcid/frontend/spring/AjaxAuthenticationSuccessHandler.java b/orcid-web/src/main/java/org/orcid/frontend/spring/AjaxAuthenticationSuccessHandler.java index 5d3fc8bca2c..bd660794e1c 100644 --- a/orcid-web/src/main/java/org/orcid/frontend/spring/AjaxAuthenticationSuccessHandler.java +++ b/orcid-web/src/main/java/org/orcid/frontend/spring/AjaxAuthenticationSuccessHandler.java @@ -6,6 +6,10 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import org.orcid.core.common.manager.EventManager; +import org.orcid.core.togglz.Features; +import org.orcid.core.utils.EventType; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.security.core.Authentication; /* @@ -15,9 +19,15 @@ * @author Robert Peters (rcpeters) */ public class AjaxAuthenticationSuccessHandler extends AjaxAuthenticationSuccessHandlerBase { + + @Autowired + EventManager eventManager; public void onAuthenticationSuccess(HttpServletRequest request, HttpServletResponse response, Authentication authentication) throws IOException, ServletException { String targetUrl = getTargetUrl(request, response, authentication); + if (Features.EVENTS.isActive()) { + eventManager.createEvent(authentication.getPrincipal().toString(), EventType.SIGN_IN, request, null); + } response.setContentType("application/json"); response.getWriter().println("{\"success\": true, \"url\": \"" + targetUrl.replaceAll("^/", "") + "\"}"); } diff --git a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java index e42dd146931..b4626ba8551 100644 --- a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java +++ b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/EmailDomainController.java @@ -4,8 +4,10 @@ import javax.ws.rs.core.MediaType; import org.orcid.core.common.manager.EmailDomainManager; +import org.orcid.core.solr.OrcidSolrOrgsClient; import org.orcid.core.utils.OrcidStringUtils; import org.orcid.persistence.jpa.entities.EmailDomainEntity; +import org.orcid.utils.solr.entities.OrgDisambiguatedSolrDocument; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMethod; @@ -22,12 +24,15 @@ public class EmailDomainController { @Resource private EmailDomainManager emailDomainManager; + @Resource + private OrcidSolrOrgsClient orcidSolrOrgsClient; + @RequestMapping(value = "/find-category", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON) public @ResponseBody ObjectNode findCategory(@RequestParam("domain") String domain) { ObjectMapper mapper = new ObjectMapper(); if(domain == null || domain.isBlank() || domain.length() > 254) { ObjectNode response = mapper.createObjectNode(); - response.put("error", "domain lenght too long or invalid"); + response.put("error", "Domain length too short, empty or invalid"); return response; } domain = OrcidStringUtils.stripHtml(domain); @@ -42,4 +47,32 @@ public class EmailDomainController { return response; } } + + @RequestMapping(value = "/find-org", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON) + public @ResponseBody ObjectNode findOrgInfo(@RequestParam("domain") String domain) { + ObjectMapper mapper = new ObjectMapper(); + ObjectNode response = mapper.createObjectNode(); + if(domain == null || domain.isBlank() || domain.length() > 254) { + response.put("error", "Domain length too short, empty or invalid"); + return response; + } + domain = OrcidStringUtils.stripHtml(domain); + + EmailDomainEntity ede = emailDomainManager.findByEmailDoman(domain); + if(ede != null) { + String rorId = ede.getRorId(); + if(rorId != null && !rorId.isBlank()) { + OrgDisambiguatedSolrDocument orgInfo = orcidSolrOrgsClient.getOrgByRorId(rorId); + if(orgInfo != null) { + // Pick the first result + response.put("Domain", domain); + response.put("ROR", rorId); + response.put("Org Name", orgInfo.getOrgDisambiguatedName()); + response.put("Country", orgInfo.getOrgDisambiguatedCountry()); + response.put("City", orgInfo.getOrgDisambiguatedCity()); + } + } + } + return response; + } } diff --git a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/FundingsController.java b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/FundingsController.java index 711f65e75eb..4646516bc42 100644 --- a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/FundingsController.java +++ b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/FundingsController.java @@ -6,11 +6,14 @@ import java.text.NumberFormat; import java.text.ParsePosition; import java.util.ArrayList; +import java.util.Collections; import java.util.Currency; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Resource; import javax.servlet.http.HttpServletRequest; @@ -24,7 +27,9 @@ import org.orcid.core.manager.v3.ProfileFundingManager; import org.orcid.core.security.visibility.OrcidVisibilityDefaults; import org.orcid.core.utils.v3.ContributorUtils; +import org.orcid.core.utils.v3.SourceUtils; import org.orcid.core.utils.v3.activities.FundingComparators; +import org.orcid.frontend.web.pagination.WorksPaginator; import org.orcid.frontend.web.util.LanguagesMap; import org.orcid.jaxb.model.common.FundingType; import org.orcid.jaxb.model.common.Relationship; @@ -196,7 +201,12 @@ FundingForm getFunding() { fundingGroups.add(fundingGroup); } - fundingGroups.sort(new FundingComparators().getInstance(sort, sortAsc, getEffectiveUserOrcid())); + if ("source".equals(sort)) { + fundingGroups = new FundingComparators().sortBySource(fundingGroups, sortAsc, getEffectiveUserOrcid()); + } else { + fundingGroups.sort(new FundingComparators().getInstance(sort, sortAsc, getEffectiveUserOrcid())); + } + return fundingGroups; } diff --git a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/OauthAuthorizeController.java b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/OauthAuthorizeController.java index 7cef06b82a6..45821b78f64 100644 --- a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/OauthAuthorizeController.java +++ b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/OauthAuthorizeController.java @@ -11,8 +11,11 @@ import org.orcid.core.constants.OrcidOauth2Constants; import org.orcid.core.exception.ClientDeactivatedException; import org.orcid.core.exception.LockedException; +import org.orcid.core.common.manager.EventManager; import org.orcid.core.manager.v3.ProfileEntityManager; import org.orcid.core.oauth.OrcidRandomValueTokenServices; +import org.orcid.core.togglz.Features; +import org.orcid.core.utils.EventType; import org.orcid.frontend.web.controllers.helper.OauthHelper; import org.orcid.jaxb.model.message.ScopePathType; import org.orcid.persistence.jpa.entities.ClientDetailsEntity; @@ -52,6 +55,9 @@ public class OauthAuthorizeController extends OauthControllerBase { @Resource private OauthHelper oauthHelper; + + @Resource + private EventManager eventManager; /** This is called if user is already logged in. * Checks permissions have been granted to client and generates access code. @@ -244,6 +250,10 @@ public ModelAndView loginGetHandler(HttpServletRequest request, HttpServletRespo // Approve RedirectView view = (RedirectView) authorizationEndpoint.approveOrDeny(approvalParams, model, status, auth); requestInfoForm.setRedirectUrl(view.getUrl()); + if (Features.EVENTS.isActive()) { + EventType eventType = "true".equals(approvalParams.get("user_oauth_approval")) ? EventType.AUTHORIZE : EventType.AUTHORIZE_DENY; + eventManager.createEvent(auth.getPrincipal().toString(), eventType, null, requestInfoForm); + } if(new HttpSessionRequestCache().getRequest(request, response) != null) new HttpSessionRequestCache().removeRequest(request, response); LOGGER.info("OauthConfirmAccessController form.getRedirectUri being sent to client browser: " + requestInfoForm.getRedirectUrl()); @@ -252,7 +262,7 @@ public ModelAndView loginGetHandler(HttpServletRequest request, HttpServletRespo request.getSession().removeAttribute(OrcidOauth2Constants.OAUTH_2SCREENS); return requestInfoForm; } - + /** * Copies all request parameters into the provided params map * diff --git a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/PublicProfileController.java b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/PublicProfileController.java index 53144913727..0fb40021d1f 100644 --- a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/PublicProfileController.java +++ b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/PublicProfileController.java @@ -84,11 +84,14 @@ import java.math.BigDecimal; import java.text.NumberFormat; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.stream.Collectors; import java.util.stream.IntStream; +import java.util.stream.Stream; @Controller public class PublicProfileController extends BaseWorkspaceController { @@ -428,7 +431,11 @@ private boolean isRecordReadyForIndexing(ProfileEntity profile) { fundingGroups.add(fundingGroup); } - fundingGroups.sort(new FundingComparators().getInstance(sort, sortAsc, orcid)); + if ("source".equals(sort)) { + fundingGroups = new FundingComparators().sortBySource(fundingGroups, sortAsc, orcid); + } else { + fundingGroups.sort(new FundingComparators().getInstance(sort, sortAsc, orcid)); + } return fundingGroups; } diff --git a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/PublicRecordController.java b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/PublicRecordController.java index f0b9d6e2ff5..63f56614557 100644 --- a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/PublicRecordController.java +++ b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/PublicRecordController.java @@ -1,5 +1,6 @@ package org.orcid.frontend.web.controllers; +import org.orcid.core.common.manager.EventManager; import org.orcid.core.exception.DeactivatedException; import org.orcid.core.exception.LockedException; import org.orcid.core.exception.OrcidDeprecatedException; @@ -27,6 +28,7 @@ import org.orcid.core.manager.v3.read_only.WorkManagerReadOnly; import org.orcid.core.oauth.OrcidOauth2TokenDetailService; import org.orcid.core.togglz.Features; +import org.orcid.core.utils.EventType; import org.orcid.core.utils.v3.SourceUtils; import org.orcid.frontend.web.pagination.Page; import org.orcid.frontend.web.pagination.ResearchResourcePaginator; @@ -163,6 +165,9 @@ public class PublicRecordController extends BaseWorkspaceController { @Resource PublicProfileController publicProfileController; + @Resource + private EventManager eventManager; + @Resource private WorksCacheManager worksCacheManager; @@ -179,6 +184,9 @@ PublicRecord getPublicRecord(@PathVariable("orcid") String orcid) { boolean isDeprecated = false; try { + if (Features.EVENTS.isActive()) { + eventManager.createEvent(orcid, EventType.PUBLIC_PAGE, null, null); + } // Check if the profile is deprecated or locked orcidSecurityManager.checkProfile(orcid); } catch (LockedException | DeactivatedException e) { diff --git a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/RegistrationController.java b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/RegistrationController.java index 83bf8621655..78f299e9c1d 100644 --- a/orcid-web/src/main/java/org/orcid/frontend/web/controllers/RegistrationController.java +++ b/orcid-web/src/main/java/org/orcid/frontend/web/controllers/RegistrationController.java @@ -20,6 +20,7 @@ import org.orcid.core.manager.EncryptionManager; import org.orcid.core.manager.ProfileEntityCacheManager; import org.orcid.core.manager.RegistrationManager; +import org.orcid.core.common.manager.EventManager; import org.orcid.core.manager.v3.OrcidSearchManager; import org.orcid.core.manager.v3.ProfileHistoryEventManager; import org.orcid.core.manager.v3.read_only.AffiliationsManagerReadOnly; @@ -27,6 +28,8 @@ import org.orcid.core.manager.v3.read_only.RecordNameManagerReadOnly; import org.orcid.core.profile.history.ProfileHistoryEventType; import org.orcid.core.security.OrcidUserDetailsService; +import org.orcid.core.togglz.Features; +import org.orcid.core.utils.EventType; import org.orcid.core.utils.OrcidRequestUtil; import org.orcid.core.utils.OrcidStringUtils; import org.orcid.frontend.email.RecordEmailSender; @@ -130,6 +133,9 @@ public class RegistrationController extends BaseController { @Resource private SocialSignInUtils socialSignInUtils; + @Resource + private EventManager eventManager; + @RequestMapping(value = "/register.json", method = RequestMethod.GET) public @ResponseBody Registration getRegister(HttpServletRequest request, HttpServletResponse response) { // Remove the session hash if needed @@ -298,6 +304,9 @@ public void validateGrcaptcha(HttpServletRequest request, @RequestBody Registrat redirectUrl = calculateRedirectUrl(request, response, true, true); } r.setUrl(redirectUrl); + if (Features.EVENTS.isActive()) { + eventManager.createEvent(getCurrentUserOrcid(), EventType.NEW_REGISTRATION, request, null); + } return r; } @@ -572,4 +581,4 @@ private void processProfileHistoryEvents(Registration registration, String newUs } } -} \ No newline at end of file +} diff --git a/orcid-web/src/main/java/org/orcid/frontend/web/pagination/WorksPaginator.java b/orcid-web/src/main/java/org/orcid/frontend/web/pagination/WorksPaginator.java index ccb7796241f..9553e641f11 100644 --- a/orcid-web/src/main/java/org/orcid/frontend/web/pagination/WorksPaginator.java +++ b/orcid-web/src/main/java/org/orcid/frontend/web/pagination/WorksPaginator.java @@ -5,6 +5,8 @@ import java.util.Comparator; import java.util.Iterator; import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Resource; @@ -45,7 +47,11 @@ public Page<WorkGroup> getWorksPage(String orcid, int offset, int pageSize, bool Page<WorkGroup> worksPage = new Page<WorkGroup>(); if (works != null) { List<org.orcid.jaxb.model.v3.release.record.summary.WorkGroup> filteredGroups = filter(works, justPublic); - filteredGroups = sort(filteredGroups, sort, sortAsc, orcid); + if ("source".equals(sort)) { + filteredGroups = sortBySource(filteredGroups, sortAsc, orcid); + } else { + filteredGroups = sort(filteredGroups, sort, sortAsc, orcid); + } worksPage.setTotalGroups(filteredGroups.size()); @@ -65,7 +71,11 @@ public Page<WorkGroup> getWorksExtendedPage(String orcid, int offset, int pageSi Page<WorkGroup> worksPage = new Page<WorkGroup>(); if (works != null) { List<WorkGroupExtended> filteredGroups = filterWorksExtended(works, justPublic); - filteredGroups = sortExtended(filteredGroups, sort, sortAsc, orcid); + if ("source".equals(sort)) { + filteredGroups = sortBySourceExtended(filteredGroups, sortAsc, orcid); + } else { + filteredGroups = sortExtended(filteredGroups, sort, sortAsc, orcid); + } worksPage.setTotalGroups(filteredGroups.size()); @@ -126,8 +136,6 @@ private List<org.orcid.jaxb.model.v3.release.record.summary.WorkGroup> sort(List Collections.sort(list, new DateComparator()); } else if (TYPE_SORT_KEY.equals(sort)) { Collections.sort(list, new TypeComparator()); - } else if (SOURCE_SORT_KEY.equals(sort)) { - Collections.sort(list, new SourceComparator(orcid)); } if (!sortAsc) { @@ -143,8 +151,6 @@ private List<WorkGroupExtended> sortExtended(List<WorkGroupExtended> list, Strin Collections.sort(list, new DateComparatorWorkGroupExtended()); } else if (TYPE_SORT_KEY.equals(sort)) { Collections.sort(list, new TypeComparatorWorkGroupExtended()); - } else if (SOURCE_SORT_KEY.equals(sort)) { - Collections.sort(list, new SourceComparatorWorkGroupExtended(orcid)); } if (!sortAsc) { @@ -457,21 +463,36 @@ public int compare(WorkGroupExtended o1, WorkGroupExtended o2) { } } - private class SourceComparatorWorkGroupExtended implements Comparator<WorkGroupExtended> { - private String orcid; + public List<org.orcid.jaxb.model.v3.release.record.summary.WorkGroup> sortBySource(List<org.orcid.jaxb.model.v3.release.record.summary.WorkGroup> workGroups, boolean sortAsc, String orcid) { + List<org.orcid.jaxb.model.v3.release.record.summary.WorkGroup> selfAsserted = workGroups.stream() + .filter(work -> SourceUtils.isSelfAsserted(work.getWorkSummary().get(0).getSource(), orcid)) + .collect(Collectors.toList()); - SourceComparatorWorkGroupExtended(String orcid) { - this.orcid = orcid; - } + List<org.orcid.jaxb.model.v3.release.record.summary.WorkGroup> validated = workGroups.stream() + .filter(work -> !SourceUtils.isSelfAsserted(work.getWorkSummary().get(0).getSource(), orcid)) + .collect(Collectors.toList()); - @Override - public int compare(WorkGroupExtended o1, WorkGroupExtended o2) { - return Boolean.compare(isSelfAsserted(o1.getWorkSummary().get(0)), isSelfAsserted(o2.getWorkSummary().get(0))); - } + selfAsserted.sort(new TitleComparator()); + validated.sort(new TitleComparator()); - private boolean isSelfAsserted(WorkSummaryExtended workSummary) { - return SourceUtils.isSelfAsserted(workSummary.getSource(), orcid); - } + return (sortAsc ? Stream.concat(validated.stream(), selfAsserted.stream()) : Stream.concat(selfAsserted.stream(), validated.stream())) + .collect(Collectors.toList()); + } + + public List<WorkGroupExtended> sortBySourceExtended(List<WorkGroupExtended> workGroups, boolean sortAsc, String orcid) { + List<WorkGroupExtended> selfAsserted = workGroups.stream() + .filter(work -> SourceUtils.isSelfAsserted(work.getWorkSummary().get(0).getSource(), orcid)) + .collect(Collectors.toList()); + + List<WorkGroupExtended> validated = workGroups.stream() + .filter(work -> !SourceUtils.isSelfAsserted(work.getWorkSummary().get(0).getSource(), orcid)) + .collect(Collectors.toList()); + + selfAsserted.sort(new TitleComparatorWorkGroupExtended()); + validated.sort(new TitleComparatorWorkGroupExtended()); + + return (sortAsc ? Stream.concat(validated.stream(), selfAsserted.stream()) : Stream.concat(selfAsserted.stream(), validated.stream())) + .collect(Collectors.toList()); } } diff --git a/orcid-web/src/main/resources/orcid-frontend-security.xml b/orcid-web/src/main/resources/orcid-frontend-security.xml index d264b73f05e..42e2fd62ab0 100644 --- a/orcid-web/src/main/resources/orcid-frontend-security.xml +++ b/orcid-web/src/main/resources/orcid-frontend-security.xml @@ -421,7 +421,9 @@ <sec:intercept-url pattern="/countryNamesToCountryCodes.json(\?.*)?" access="IS_AUTHENTICATED_ANONYMOUSLY" /> <sec:intercept-url pattern="/email-domain/find-category(\?.*)?" - access="IS_AUTHENTICATED_ANONYMOUSLY" /> + access="IS_AUTHENTICATED_ANONYMOUSLY" /> + <sec:intercept-url pattern="/email-domain/find-org(\?.*)?" + access="IS_AUTHENTICATED_ANONYMOUSLY" /> <sec:intercept-url pattern="/developer-tools" access="ROLE_USER"/> <sec:intercept-url pattern="/developer-tools/.*" diff --git a/orcid-web/src/main/webapp/static/javascript/ng1Orcid/package-lock.json b/orcid-web/src/main/webapp/static/javascript/ng1Orcid/package-lock.json index 5db2bc4f6f4..6e4c96591db 100644 --- a/orcid-web/src/main/webapp/static/javascript/ng1Orcid/package-lock.json +++ b/orcid-web/src/main/webapp/static/javascript/ng1Orcid/package-lock.json @@ -1011,26 +1011,32 @@ } }, "browserify-sign": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/browserify-sign/-/browserify-sign-4.2.1.tgz", - "integrity": "sha512-/vrA5fguVAKKAVTNJjgSm1tRQDHUU6DbwO9IROu/0WAzC8PKhucDSh18J0RMvVeHAn5puMd+QHC2erPRNf8lmg==", + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/browserify-sign/-/browserify-sign-4.2.2.tgz", + "integrity": "sha512-1rudGyeYY42Dk6texmv7c4VcQ0EsvVbLwZkA+AQB7SxvXxmcD93jcHie8bzecJ+ChDlmAm2Qyu0+Ccg5uhZXCg==", "dev": true, "requires": { - "bn.js": "^5.1.1", - "browserify-rsa": "^4.0.1", + "bn.js": "^5.2.1", + "browserify-rsa": "^4.1.0", "create-hash": "^1.2.0", "create-hmac": "^1.1.7", - "elliptic": "^6.5.3", + "elliptic": "^6.5.4", "inherits": "^2.0.4", - "parse-asn1": "^5.1.5", - "readable-stream": "^3.6.0", - "safe-buffer": "^5.2.0" + "parse-asn1": "^5.1.6", + "readable-stream": "^3.6.2", + "safe-buffer": "^5.2.1" }, "dependencies": { + "bn.js": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-5.2.1.tgz", + "integrity": "sha512-eXRvHzWyYPBuB4NBy0cmYQjGitUrtqwbvlzP3G6VFnNRbsZQIxQ10PbKKHt8gZ/HW/D/747aDl+QkDqg3KQLMQ==", + "dev": true + }, "readable-stream": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz", - "integrity": "sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==", + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", "dev": true, "requires": { "inherits": "^2.0.3", diff --git a/orcid-web/src/test/java/org/orcid/frontend/web/controllers/PublicRecordControllerTest.java b/orcid-web/src/test/java/org/orcid/frontend/web/controllers/PublicRecordControllerTest.java index ea2ec422e91..f910d1a146a 100644 --- a/orcid-web/src/test/java/org/orcid/frontend/web/controllers/PublicRecordControllerTest.java +++ b/orcid-web/src/test/java/org/orcid/frontend/web/controllers/PublicRecordControllerTest.java @@ -155,8 +155,8 @@ public void testGetRecordSummary() { // Check external identifiers assertNotNull(record.getExternalIdentifiers()); assertEquals(3, record.getExternalIdentifiers().size()); - - // Added by member + + // User OBO assertEquals(String.valueOf(19), record.getExternalIdentifiers().get(0).getId()); assertEquals("self_public_user_obo_type", record.getExternalIdentifiers().get(0).getCommonName()); assertEquals("self_public_user_obo_ref", record.getExternalIdentifiers().get(0).getReference()); @@ -168,7 +168,7 @@ public void testGetRecordSummary() { assertEquals("self_public_ref", record.getExternalIdentifiers().get(1).getReference()); assertEquals("http://ext-id/self/public", record.getExternalIdentifiers().get(1).getUrl()); assertFalse(record.getExternalIdentifiers().get(1).isValidated()); - // User OBO + // Added by member assertEquals(String.valueOf(13), record.getExternalIdentifiers().get(2).getId()); assertEquals("public_type", record.getExternalIdentifiers().get(2).getCommonName()); assertEquals("public_ref", record.getExternalIdentifiers().get(2).getReference()); diff --git a/orcid-web/src/test/java/org/orcid/frontend/web/pagination/WorksPaginatorTest.java b/orcid-web/src/test/java/org/orcid/frontend/web/pagination/WorksPaginatorTest.java index 2a10808f122..b9ca7dce213 100644 --- a/orcid-web/src/test/java/org/orcid/frontend/web/pagination/WorksPaginatorTest.java +++ b/orcid-web/src/test/java/org/orcid/frontend/web/pagination/WorksPaginatorTest.java @@ -181,8 +181,8 @@ public void testSourceSort() { Mockito.when(worksCacheManager.getGroupedWorks(Mockito.anyString())).thenReturn(works); Page<org.orcid.pojo.grouping.WorkGroup> page = worksPaginator.getWorksPage("orcid", 0, pageSize, false, WorksPaginator.SOURCE_SORT_KEY, false); - assertEquals("APP-5555-5555-5555-5555", page.getGroups().get(0).getWorks().get(0).getSource()); - assertEquals("orcid", page.getGroups().get(49).getWorks().get(0).getSource()); + assertEquals("orcid", page.getGroups().get(0).getWorks().get(0).getSource()); + assertEquals("APP-5555-5555-5555-5555", page.getGroups().get(49).getWorks().get(0).getSource()); } @Test @@ -218,8 +218,8 @@ public void testTitleSortCase() { Mockito.when(worksCacheManager.getGroupedWorks(Mockito.anyString())).thenReturn(works); Page<org.orcid.pojo.grouping.WorkGroup> page = worksPaginator.getWorksPage("orcid", 0, pageSize, false, WorksPaginator.SOURCE_SORT_KEY, false); - assertEquals("APP-5555-5555-5555-5555", page.getGroups().get(0).getWorks().get(0).getSource()); - assertEquals("orcid", page.getGroups().get(49).getWorks().get(0).getSource()); + assertEquals("orcid", page.getGroups().get(0).getWorks().get(0).getSource()); + assertEquals("APP-5555-5555-5555-5555", page.getGroups().get(49).getWorks().get(0).getSource()); } @Test diff --git a/properties/development.properties b/properties/development.properties index bfcef35f40a..de9a4064834 100644 --- a/properties/development.properties +++ b/properties/development.properties @@ -196,7 +196,7 @@ org.orcid.core.orgs.fundref.localFilePath=/tmp/some/path # ROR org imports org.orcid.core.orgs.ror.enabled=false -org.orcid.core.orgs.ror.zenodoRecordsUrl=https://zenodo.org/api/records/?communities=ror-data +org.orcid.core.orgs.ror.zenodoRecordsUrl=https://zenodo.org/api/records?communities=ror-data org.orcid.core.orgs.ror.localDataPath=/tmp/ror/ror.json org.orcid.core.orgs.ror.localZipPath=/tmp/ror/ror.zip diff --git a/solr-config/cores/fundingSubType/conf/lang/contractions_ca.txt b/solr-config/cores/fundingSubType/conf/lang/contractions_ca.txt deleted file mode 100644 index 307a85f913d..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/contractions_ca.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Set of Catalan contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -d -l -m -n -s -t diff --git a/solr-config/cores/fundingSubType/conf/lang/contractions_fr.txt b/solr-config/cores/fundingSubType/conf/lang/contractions_fr.txt deleted file mode 100644 index f1bba51b23e..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/contractions_fr.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Set of French contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -l -m -t -qu -n -s -j -d -c -jusqu -quoiqu -lorsqu -puisqu diff --git a/solr-config/cores/fundingSubType/conf/lang/contractions_ga.txt b/solr-config/cores/fundingSubType/conf/lang/contractions_ga.txt deleted file mode 100644 index 9ebe7fa349a..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/contractions_ga.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Set of Irish contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -d -m -b diff --git a/solr-config/cores/fundingSubType/conf/lang/contractions_it.txt b/solr-config/cores/fundingSubType/conf/lang/contractions_it.txt deleted file mode 100644 index cac04095372..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/contractions_it.txt +++ /dev/null @@ -1,23 +0,0 @@ -# Set of Italian contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -c -l -all -dall -dell -nell -sull -coll -pell -gl -agl -dagl -degl -negl -sugl -un -m -t -s -v -d diff --git a/solr-config/cores/fundingSubType/conf/lang/hyphenations_ga.txt b/solr-config/cores/fundingSubType/conf/lang/hyphenations_ga.txt deleted file mode 100644 index 4d2642cc5a3..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/hyphenations_ga.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Set of Irish hyphenations for StopFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -h -n -t diff --git a/solr-config/cores/fundingSubType/conf/lang/stemdict_nl.txt b/solr-config/cores/fundingSubType/conf/lang/stemdict_nl.txt deleted file mode 100644 index 441072971d3..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stemdict_nl.txt +++ /dev/null @@ -1,6 +0,0 @@ -# Set of overrides for the dutch stemmer -# TODO: load this as a resource from the analyzer and sync it in build.xml -fiets fiets -bromfiets bromfiets -ei eier -kind kinder diff --git a/solr-config/cores/fundingSubType/conf/lang/stoptags_ja.txt b/solr-config/cores/fundingSubType/conf/lang/stoptags_ja.txt deleted file mode 100644 index 71b750845e3..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stoptags_ja.txt +++ /dev/null @@ -1,420 +0,0 @@ -# -# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. -# -# Any token with a part-of-speech tag that exactly matches those defined in this -# file are removed from the token stream. -# -# Set your own stoptags by uncommenting the lines below. Note that comments are -# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, -# etc. that can be useful for building you own stoptag set. -# -# The entire possible tagset is provided below for convenience. -# -##### -# noun: unclassified nouns -#名詞 -# -# noun-common: Common nouns or nouns where the sub-classification is undefined -#名詞-一般 -# -# noun-proper: Proper nouns where the sub-classification is undefined -#名詞-固有名詞 -# -# noun-proper-misc: miscellaneous proper nouns -#名詞-固有名詞-一般 -# -# noun-proper-person: Personal names where the sub-classification is undefined -#名詞-固有名詞-人名 -# -# noun-proper-person-misc: names that cannot be divided into surname and -# given name; foreign names; names where the surname or given name is unknown. -# e.g. お市の方 -#名詞-固有名詞-人名-一般 -# -# noun-proper-person-surname: Mainly Japanese surnames. -# e.g. 山田 -#名詞-固有名詞-人名-姓 -# -# noun-proper-person-given_name: Mainly Japanese given names. -# e.g. 太郎 -#名詞-固有名詞-人名-名 -# -# noun-proper-organization: Names representing organizations. -# e.g. 通産省, NHK -#名詞-固有名詞-組織 -# -# noun-proper-place: Place names where the sub-classification is undefined -#名詞-固有名詞-地域 -# -# noun-proper-place-misc: Place names excluding countries. -# e.g. アジア, バルセロナ, 京都 -#名詞-固有名詞-地域-一般 -# -# noun-proper-place-country: Country names. -# e.g. 日本, オーストラリア -#名詞-固有名詞-地域-国 -# -# noun-pronoun: Pronouns where the sub-classification is undefined -#名詞-代名詞 -# -# noun-pronoun-misc: miscellaneous pronouns: -# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ -#名詞-代名詞-一般 -# -# noun-pronoun-contraction: Spoken language contraction made by combining a -# pronoun and the particle 'wa'. -# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ -#名詞-代名詞-縮約 -# -# noun-adverbial: Temporal nouns such as names of days or months that behave -# like adverbs. Nouns that represent amount or ratios and can be used adverbially, -# e.g. 金曜, 一月, 午後, 少量 -#名詞-副詞可能 -# -# noun-verbal: Nouns that take arguments with case and can appear followed by -# 'suru' and related verbs (する, できる, なさる, くださる) -# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り -#名詞-サ変接続 -# -# noun-adjective-base: The base form of adjectives, words that appear before な ("na") -# e.g. 健康, 安易, 駄目, だめ -#名詞-形容動詞語幹 -# -# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. -# e.g. 0, 1, 2, 何, 数, 幾 -#名詞-数 -# -# noun-affix: noun affixes where the sub-classification is undefined -#名詞-非自立 -# -# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that -# attach to the base form of inflectional words, words that cannot be classified -# into any of the other categories below. This category includes indefinite nouns. -# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, -# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, -# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, -# わり, 割り, 割, ん-口語/, もん-口語/ -#名詞-非自立-一般 -# -# noun-affix-adverbial: noun affixes that that can behave as adverbs. -# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, -# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, -# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, -# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, -# 儘, 侭, みぎり, 矢先 -#名詞-非自立-副詞可能 -# -# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars -# with the stem よう(だ) ("you(da)"). -# e.g. よう, やう, 様 (よう) -#名詞-非自立-助動詞語幹 -# -# noun-affix-adjective-base: noun affixes that can connect to the indeclinable -# connection form な (aux "da"). -# e.g. みたい, ふう -#名詞-非自立-形容動詞語幹 -# -# noun-special: special nouns where the sub-classification is undefined. -#名詞-特殊 -# -# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is -# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base -# form of inflectional words. -# e.g. そう -#名詞-特殊-助動詞語幹 -# -# noun-suffix: noun suffixes where the sub-classification is undefined. -#名詞-接尾 -# -# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect -# to ガル or タイ and can combine into compound nouns, words that cannot be classified into -# any of the other categories below. In general, this category is more inclusive than -# 接尾語 ("suffix") and is usually the last element in a compound noun. -# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, -# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 -#名詞-接尾-一般 -# -# noun-suffix-person: Suffixes that form nouns and attach to person names more often -# than other nouns. -# e.g. 君, 様, 著 -#名詞-接尾-人名 -# -# noun-suffix-place: Suffixes that form nouns and attach to place names more often -# than other nouns. -# e.g. 町, 市, 県 -#名詞-接尾-地域 -# -# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that -# can appear before スル ("suru"). -# e.g. 化, 視, 分け, 入り, 落ち, 買い -#名詞-接尾-サ変接続 -# -# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, -# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the -# conjunctive form of inflectional words. -# e.g. そう -#名詞-接尾-助動詞語幹 -# -# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive -# form of inflectional words and appear before the copula だ ("da"). -# e.g. 的, げ, がち -#名詞-接尾-形容動詞語幹 -# -# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. -# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) -#名詞-接尾-副詞可能 -# -# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category -# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach -# to numbers. -# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 -#名詞-接尾-助数詞 -# -# noun-suffix-special: Special suffixes that mainly attach to inflecting words. -# e.g. (楽し) さ, (考え) 方 -#名詞-接尾-特殊 -# -# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words -# together. -# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) -#名詞-接続詞的 -# -# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are -# semantically verb-like. -# e.g. ごらん, ご覧, 御覧, 頂戴 -#名詞-動詞非自立的 -# -# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, -# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") -# is いわく ("iwaku"). -#名詞-引用文字列 -# -# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and -# behave like an adjective. -# e.g. 申し訳, 仕方, とんでも, 違い -#名詞-ナイ形容詞語幹 -# -##### -# prefix: unclassified prefixes -#接頭詞 -# -# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) -# excluding numerical expressions. -# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) -#接頭詞-名詞接続 -# -# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb -# in conjunctive form followed by なる/なさる/くださる. -# e.g. お (読みなさい), お (座り) -#接頭詞-動詞接続 -# -# prefix-adjectival: Prefixes that attach to adjectives. -# e.g. お (寒いですねえ), バカ (でかい) -#接頭詞-形容詞接続 -# -# prefix-numerical: Prefixes that attach to numerical expressions. -# e.g. 約, およそ, 毎時 -#接頭詞-数接続 -# -##### -# verb: unclassified verbs -#動詞 -# -# verb-main: -#動詞-自立 -# -# verb-auxiliary: -#動詞-非自立 -# -# verb-suffix: -#動詞-接尾 -# -##### -# adjective: unclassified adjectives -#形容詞 -# -# adjective-main: -#形容詞-自立 -# -# adjective-auxiliary: -#形容詞-非自立 -# -# adjective-suffix: -#形容詞-接尾 -# -##### -# adverb: unclassified adverbs -#副詞 -# -# adverb-misc: Words that can be segmented into one unit and where adnominal -# modification is not possible. -# e.g. あいかわらず, 多分 -#副詞-一般 -# -# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, -# な, する, だ, etc. -# e.g. こんなに, そんなに, あんなに, なにか, なんでも -#副詞-助詞類接続 -# -##### -# adnominal: Words that only have noun-modifying forms. -# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, -# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, -# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き -#連体詞 -# -##### -# conjunction: Conjunctions that can occur independently. -# e.g. が, けれども, そして, じゃあ, それどころか -接続詞 -# -##### -# particle: unclassified particles. -助詞 -# -# particle-case: case particles where the subclassification is undefined. -助詞-格助詞 -# -# particle-case-misc: Case particles. -# e.g. から, が, で, と, に, へ, より, を, の, にて -助詞-格助詞-一般 -# -# particle-case-quote: the "to" that appears after nouns, a person’s speech, -# quotation marks, expressions of decisions from a meeting, reasons, judgements, -# conjectures, etc. -# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) -助詞-格助詞-引用 -# -# particle-case-compound: Compounds of particles and verbs that mainly behave -# like case particles. -# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, -# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, -# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, -# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, -# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, -# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, -# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, -# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ -助詞-格助詞-連語 -# -# particle-conjunctive: -# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, -# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, -# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ -助詞-接続助詞 -# -# particle-dependency: -# e.g. こそ, さえ, しか, すら, は, も, ぞ -助詞-係助詞 -# -# particle-adverbial: -# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, -# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, -# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, -# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, -# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) -助詞-副助詞 -# -# particle-interjective: particles with interjective grammatical roles. -# e.g. (松島) や -助詞-間投助詞 -# -# particle-coordinate: -# e.g. と, たり, だの, だり, とか, なり, や, やら -助詞-並立助詞 -# -# particle-final: -# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, -# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ -助詞-終助詞 -# -# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is -# adverbial, conjunctive, or sentence final. For example: -# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 -# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 -# 「(祈りが届いたせい) か (, 試験に合格した.)」 -# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 -# e.g. か -助詞-副助詞/並立助詞/終助詞 -# -# particle-adnominalizer: The "no" that attaches to nouns and modifies -# non-inflectional words. -助詞-連体化 -# -# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs -# that are giongo, giseigo, or gitaigo. -# e.g. に, と -助詞-副詞化 -# -# particle-special: A particle that does not fit into one of the above classifications. -# This includes particles that are used in Tanka, Haiku, and other poetry. -# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) -助詞-特殊 -# -##### -# auxiliary-verb: -助動詞 -# -##### -# interjection: Greetings and other exclamations. -# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, -# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい -#感動詞 -# -##### -# symbol: unclassified Symbols. -記号 -# -# symbol-misc: A general symbol not in one of the categories below. -# e.g. [○◎@$〒→+] -記号-一般 -# -# symbol-comma: Commas -# e.g. [,、] -記号-読点 -# -# symbol-period: Periods and full stops. -# e.g. [..。] -記号-句点 -# -# symbol-space: Full-width whitespace. -記号-空白 -# -# symbol-open_bracket: -# e.g. [({‘“『【] -記号-括弧開 -# -# symbol-close_bracket: -# e.g. [)}’”』」】] -記号-括弧閉 -# -# symbol-alphabetic: -#記号-アルファベット -# -##### -# other: unclassified other -#その他 -# -# other-interjection: Words that are hard to classify as noun-suffixes or -# sentence-final particles. -# e.g. (だ)ァ -その他-間投 -# -##### -# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. -# e.g. あの, うんと, えと -フィラー -# -##### -# non-verbal: non-verbal sound. -非言語音 -# -##### -# fragment: -#語断片 -# -##### -# unknown: unknown part of speech. -#未知語 -# -##### End of file diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_ar.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_ar.txt deleted file mode 100644 index 046829db6a2..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_ar.txt +++ /dev/null @@ -1,125 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -# Cleaned on October 11, 2009 (not normalized, so use before normalization) -# This means that when modifying this list, you might need to add some -# redundant entries, for example containing forms with both أ and ا -من -ومن -منها -منه -في -وفي -فيها -فيه -و -ف -ثم -او -أو -ب -بها -به -ا -أ -اى -اي -أي -أى -لا -ولا -الا -ألا -إلا -لكن -ما -وما -كما -فما -عن -مع -اذا -إذا -ان -أن -إن -انها -أنها -إنها -انه -أنه -إنه -بان -بأن -فان -فأن -وان -وأن -وإن -التى -التي -الذى -الذي -الذين -الى -الي -إلى -إلي -على -عليها -عليه -اما -أما -إما -ايضا -أيضا -كل -وكل -لم -ولم -لن -ولن -هى -هي -هو -وهى -وهي -وهو -فهى -فهي -فهو -انت -أنت -لك -لها -له -هذه -هذا -تلك -ذلك -هناك -كانت -كان -يكون -تكون -وكانت -وكان -غير -بعض -قد -نحو -بين -بينما -منذ -ضمن -حيث -الان -الآن -خلال -بعد -قبل -حتى -عند -عندما -لدى -جميع diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_bg.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_bg.txt deleted file mode 100644 index 1ae4ba2ae38..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_bg.txt +++ /dev/null @@ -1,193 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -а -аз -ако -ала -бе -без -беше -би -бил -била -били -било -близо -бъдат -бъде -бяха -в -вас -ваш -ваша -вероятно -вече -взема -ви -вие -винаги -все -всеки -всички -всичко -всяка -във -въпреки -върху -г -ги -главно -го -д -да -дали -до -докато -докога -дори -досега -доста -е -едва -един -ето -за -зад -заедно -заради -засега -затова -защо -защото -и -из -или -им -има -имат -иска -й -каза -как -каква -какво -както -какъв -като -кога -когато -което -които -кой -който -колко -която -къде -където -към -ли -м -ме -между -мен -ми -мнозина -мога -могат -може -моля -момента -му -н -на -над -назад -най -направи -напред -например -нас -не -него -нея -ни -ние -никой -нито -но -някои -някой -няма -обаче -около -освен -особено -от -отгоре -отново -още -пак -по -повече -повечето -под -поне -поради -после -почти -прави -пред -преди -през -при -пък -първо -с -са -само -се -сега -си -скоро -след -сме -според -сред -срещу -сте -съм -със -също -т -тази -така -такива -такъв -там -твой -те -тези -ти -тн -то -това -тогава -този -той -толкова -точно -трябва -тук -тъй -тя -тях -у -харесва -ч -че -често -чрез -ще -щом -я diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_ca.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_ca.txt deleted file mode 100644 index 3da65deafe1..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_ca.txt +++ /dev/null @@ -1,220 +0,0 @@ -# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) -a -abans -ací -ah -així -això -al -als -aleshores -algun -alguna -algunes -alguns -alhora -allà -allí -allò -altra -altre -altres -amb -ambdós -ambdues -apa -aquell -aquella -aquelles -aquells -aquest -aquesta -aquestes -aquests -aquí -baix -cada -cadascú -cadascuna -cadascunes -cadascuns -com -contra -d'un -d'una -d'unes -d'uns -dalt -de -del -dels -des -després -dins -dintre -donat -doncs -durant -e -eh -el -els -em -en -encara -ens -entre -érem -eren -éreu -es -és -esta -està -estàvem -estaven -estàveu -esteu -et -etc -ets -fins -fora -gairebé -ha -han -has -havia -he -hem -heu -hi -ho -i -igual -iguals -ja -l'hi -la -les -li -li'n -llavors -m'he -ma -mal -malgrat -mateix -mateixa -mateixes -mateixos -me -mentre -més -meu -meus -meva -meves -molt -molta -moltes -molts -mon -mons -n'he -n'hi -ne -ni -no -nogensmenys -només -nosaltres -nostra -nostre -nostres -o -oh -oi -on -pas -pel -pels -per -però -perquè -poc -poca -pocs -poques -potser -propi -qual -quals -quan -quant -que -què -quelcom -qui -quin -quina -quines -quins -s'ha -s'han -sa -semblant -semblants -ses -seu -seus -seva -seva -seves -si -sobre -sobretot -sóc -solament -sols -son -són -sons -sota -sou -t'ha -t'han -t'he -ta -tal -també -tampoc -tan -tant -tanta -tantes -teu -teus -teva -teves -ton -tons -tot -tota -totes -tots -un -una -unes -uns -us -va -vaig -vam -van -vas -veu -vosaltres -vostra -vostre -vostres diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_cz.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_cz.txt deleted file mode 100644 index 53c6097dac7..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_cz.txt +++ /dev/null @@ -1,172 +0,0 @@ -a -s -k -o -i -u -v -z -dnes -cz -tímto -budeš -budem -byli -jseš -můj -svým -ta -tomto -tohle -tuto -tyto -jej -zda -proč -máte -tato -kam -tohoto -kdo -kteří -mi -nám -tom -tomuto -mít -nic -proto -kterou -byla -toho -protože -asi -ho -naši -napište -re -což -tím -takže -svých -její -svými -jste -aj -tu -tedy -teto -bylo -kde -ke -pravé -ji -nad -nejsou -či -pod -téma -mezi -přes -ty -pak -vám -ani -když -však -neg -jsem -tento -článku -články -aby -jsme -před -pta -jejich -byl -ještě -až -bez -také -pouze -první -vaše -která -nás -nový -tipy -pokud -může -strana -jeho -své -jiné -zprávy -nové -není -vás -jen -podle -zde -už -být -více -bude -již -než -který -by -které -co -nebo -ten -tak -má -při -od -po -jsou -jak -další -ale -si -se -ve -to -jako -za -zpět -ze -do -pro -je -na -atd -atp -jakmile -přičemž -já -on -ona -ono -oni -ony -my -vy -jí -ji -mě -mne -jemu -tomu -těm -těmu -němu -němuž -jehož -jíž -jelikož -jež -jakož -načež diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_da.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_da.txt deleted file mode 100644 index 42e6145b98e..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_da.txt +++ /dev/null @@ -1,110 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Danish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - -og | and -i | in -jeg | I -det | that (dem. pronoun)/it (pers. pronoun) -at | that (in front of a sentence)/to (with infinitive) -en | a/an -den | it (pers. pronoun)/that (dem. pronoun) -til | to/at/for/until/against/by/of/into, more -er | present tense of "to be" -som | who, as -på | on/upon/in/on/at/to/after/of/with/for, on -de | they -med | with/by/in, along -han | he -af | of/by/from/off/for/in/with/on, off -for | at/for/to/from/by/of/ago, in front/before, because -ikke | not -der | who/which, there/those -var | past tense of "to be" -mig | me/myself -sig | oneself/himself/herself/itself/themselves -men | but -et | a/an/one, one (number), someone/somebody/one -har | present tense of "to have" -om | round/about/for/in/a, about/around/down, if -vi | we -min | my -havde | past tense of "to have" -ham | him -hun | she -nu | now -over | over/above/across/by/beyond/past/on/about, over/past -da | then, when/as/since -fra | from/off/since, off, since -du | you -ud | out -sin | his/her/its/one's -dem | them -os | us/ourselves -op | up -man | you/one -hans | his -hvor | where -eller | or -hvad | what -skal | must/shall etc. -selv | myself/youself/herself/ourselves etc., even -her | here -alle | all/everyone/everybody etc. -vil | will (verb) -blev | past tense of "to stay/to remain/to get/to become" -kunne | could -ind | in -når | when -være | present tense of "to be" -dog | however/yet/after all -noget | something -ville | would -jo | you know/you see (adv), yes -deres | their/theirs -efter | after/behind/according to/for/by/from, later/afterwards -ned | down -skulle | should -denne | this -end | than -dette | this -mit | my/mine -også | also -under | under/beneath/below/during, below/underneath -have | have -dig | you -anden | other -hende | her -mine | my -alt | everything -meget | much/very, plenty of -sit | his, her, its, one's -sine | his, her, its, one's -vor | our -mod | against -disse | these -hvis | if -din | your/yours -nogle | some -hos | by/at -blive | be/become -mange | many -ad | by/through -bliver | present tense of "to be/to become" -hendes | her/hers -været | be -thi | for (conj) -jer | you -sådan | such, like this/like that diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_de.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_de.txt deleted file mode 100644 index 86525e7ae08..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_de.txt +++ /dev/null @@ -1,294 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A German stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | The number of forms in this list is reduced significantly by passing it - | through the German stemmer. - - -aber | but - -alle | all -allem -allen -aller -alles - -als | than, as -also | so -am | an + dem -an | at - -ander | other -andere -anderem -anderen -anderer -anderes -anderm -andern -anderr -anders - -auch | also -auf | on -aus | out of -bei | by -bin | am -bis | until -bist | art -da | there -damit | with it -dann | then - -der | the -den -des -dem -die -das - -daß | that - -derselbe | the same -derselben -denselben -desselben -demselben -dieselbe -dieselben -dasselbe - -dazu | to that - -dein | thy -deine -deinem -deinen -deiner -deines - -denn | because - -derer | of those -dessen | of him - -dich | thee -dir | to thee -du | thou - -dies | this -diese -diesem -diesen -dieser -dieses - - -doch | (several meanings) -dort | (over) there - - -durch | through - -ein | a -eine -einem -einen -einer -eines - -einig | some -einige -einigem -einigen -einiger -einiges - -einmal | once - -er | he -ihn | him -ihm | to him - -es | it -etwas | something - -euer | your -eure -eurem -euren -eurer -eures - -für | for -gegen | towards -gewesen | p.p. of sein -hab | have -habe | have -haben | have -hat | has -hatte | had -hatten | had -hier | here -hin | there -hinter | behind - -ich | I -mich | me -mir | to me - - -ihr | you, to her -ihre -ihrem -ihren -ihrer -ihres -euch | to you - -im | in + dem -in | in -indem | while -ins | in + das -ist | is - -jede | each, every -jedem -jeden -jeder -jedes - -jene | that -jenem -jenen -jener -jenes - -jetzt | now -kann | can - -kein | no -keine -keinem -keinen -keiner -keines - -können | can -könnte | could -machen | do -man | one - -manche | some, many a -manchem -manchen -mancher -manches - -mein | my -meine -meinem -meinen -meiner -meines - -mit | with -muss | must -musste | had to -nach | to(wards) -nicht | not -nichts | nothing -noch | still, yet -nun | now -nur | only -ob | whether -oder | or -ohne | without -sehr | very - -sein | his -seine -seinem -seinen -seiner -seines - -selbst | self -sich | herself - -sie | they, she -ihnen | to them - -sind | are -so | so - -solche | such -solchem -solchen -solcher -solches - -soll | shall -sollte | should -sondern | but -sonst | else -über | over -um | about, around -und | and - -uns | us -unse -unsem -unsen -unser -unses - -unter | under -viel | much -vom | von + dem -von | from -vor | before -während | while -war | was -waren | were -warst | wast -was | what -weg | away, off -weil | because -weiter | further - -welche | which -welchem -welchen -welcher -welches - -wenn | when -werde | will -werden | will -wie | how -wieder | again -will | want -wir | we -wird | will -wirst | willst -wo | where -wollen | want -wollte | wanted -würde | would -würden | would -zu | to -zum | zu + dem -zur | zu + der -zwar | indeed -zwischen | between - diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_el.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_el.txt deleted file mode 100644 index 232681f5bd6..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_el.txt +++ /dev/null @@ -1,78 +0,0 @@ -# Lucene Greek Stopwords list -# Note: by default this file is used after GreekLowerCaseFilter, -# so when modifying this file use 'σ' instead of 'ς' -ο -η -το -οι -τα -του -τησ -των -τον -την -και -κι -κ -ειμαι -εισαι -ειναι -ειμαστε -ειστε -στο -στον -στη -στην -μα -αλλα -απο -για -προσ -με -σε -ωσ -παρα -αντι -κατα -μετα -θα -να -δε -δεν -μη -μην -επι -ενω -εαν -αν -τοτε -που -πωσ -ποιοσ -ποια -ποιο -ποιοι -ποιεσ -ποιων -ποιουσ -αυτοσ -αυτη -αυτο -αυτοι -αυτων -αυτουσ -αυτεσ -αυτα -εκεινοσ -εκεινη -εκεινο -εκεινοι -εκεινεσ -εκεινα -εκεινων -εκεινουσ -οπωσ -ομωσ -ισωσ -οσο -οτι diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_en.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_en.txt deleted file mode 100644 index 2c164c0b2a1..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_en.txt +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# a couple of test stopwords to test that the words are really being -# configured from this file: -stopworda -stopwordb - -# Standard english stop words taken from Lucene's StopAnalyzer -a -an -and -are -as -at -be -but -by -for -if -in -into -is -it -no -not -of -on -or -such -that -the -their -then -there -these -they -this -to -was -will -with diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_es.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_es.txt deleted file mode 100644 index 487d78c8d56..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_es.txt +++ /dev/null @@ -1,356 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Spanish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - - | The following is a ranked list (commonest to rarest) of stopwords - | deriving from a large sample of text. - - | Extra words have been added at the end. - -de | from, of -la | the, her -que | who, that -el | the -en | in -y | and -a | to -los | the, them -del | de + el -se | himself, from him etc -las | the, them -por | for, by, etc -un | a -para | for -con | with -no | no -una | a -su | his, her -al | a + el - | es from SER -lo | him -como | how -más | more -pero | pero -sus | su plural -le | to him, her -ya | already -o | or - | fue from SER -este | this - | ha from HABER -sí | himself etc -porque | because -esta | this - | son from SER -entre | between - | está from ESTAR -cuando | when -muy | very -sin | without -sobre | on - | ser from SER - | tiene from TENER -también | also -me | me -hasta | until -hay | there is/are -donde | where - | han from HABER -quien | whom, that - | están from ESTAR - | estado from ESTAR -desde | from -todo | all -nos | us -durante | during - | estados from ESTAR -todos | all -uno | a -les | to them -ni | nor -contra | against -otros | other - | fueron from SER -ese | that -eso | that - | había from HABER -ante | before -ellos | they -e | and (variant of y) -esto | this -mí | me -antes | before -algunos | some -qué | what? -unos | a -yo | I -otro | other -otras | other -otra | other -él | he -tanto | so much, many -esa | that -estos | these -mucho | much, many -quienes | who -nada | nothing -muchos | many -cual | who - | sea from SER -poco | few -ella | she -estar | to be - | haber from HABER -estas | these - | estaba from ESTAR - | estamos from ESTAR -algunas | some -algo | something -nosotros | we - - | other forms - -mi | me -mis | mi plural -tú | thou -te | thee -ti | thee -tu | thy -tus | tu plural -ellas | they -nosotras | we -vosotros | you -vosotras | you -os | you -mío | mine -mía | -míos | -mías | -tuyo | thine -tuya | -tuyos | -tuyas | -suyo | his, hers, theirs -suya | -suyos | -suyas | -nuestro | ours -nuestra | -nuestros | -nuestras | -vuestro | yours -vuestra | -vuestros | -vuestras | -esos | those -esas | those - - | forms of estar, to be (not including the infinitive): -estoy -estás -está -estamos -estáis -están -esté -estés -estemos -estéis -estén -estaré -estarás -estará -estaremos -estaréis -estarán -estaría -estarías -estaríamos -estaríais -estarían -estaba -estabas -estábamos -estabais -estaban -estuve -estuviste -estuvo -estuvimos -estuvisteis -estuvieron -estuviera -estuvieras -estuviéramos -estuvierais -estuvieran -estuviese -estuvieses -estuviésemos -estuvieseis -estuviesen -estando -estado -estada -estados -estadas -estad - - | forms of haber, to have (not including the infinitive): -he -has -ha -hemos -habéis -han -haya -hayas -hayamos -hayáis -hayan -habré -habrás -habrá -habremos -habréis -habrán -habría -habrías -habríamos -habríais -habrían -había -habías -habíamos -habíais -habían -hube -hubiste -hubo -hubimos -hubisteis -hubieron -hubiera -hubieras -hubiéramos -hubierais -hubieran -hubiese -hubieses -hubiésemos -hubieseis -hubiesen -habiendo -habido -habida -habidos -habidas - - | forms of ser, to be (not including the infinitive): -soy -eres -es -somos -sois -son -sea -seas -seamos -seáis -sean -seré -serás -será -seremos -seréis -serán -sería -serías -seríamos -seríais -serían -era -eras -éramos -erais -eran -fui -fuiste -fue -fuimos -fuisteis -fueron -fuera -fueras -fuéramos -fuerais -fueran -fuese -fueses -fuésemos -fueseis -fuesen -siendo -sido - | sed also means 'thirst' - - | forms of tener, to have (not including the infinitive): -tengo -tienes -tiene -tenemos -tenéis -tienen -tenga -tengas -tengamos -tengáis -tengan -tendré -tendrás -tendrá -tendremos -tendréis -tendrán -tendría -tendrías -tendríamos -tendríais -tendrían -tenía -tenías -teníamos -teníais -tenían -tuve -tuviste -tuvo -tuvimos -tuvisteis -tuvieron -tuviera -tuvieras -tuviéramos -tuvierais -tuvieran -tuviese -tuvieses -tuviésemos -tuvieseis -tuviesen -teniendo -tenido -tenida -tenidos -tenidas -tened - diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_eu.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_eu.txt deleted file mode 100644 index 25f1db93460..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_eu.txt +++ /dev/null @@ -1,99 +0,0 @@ -# example set of basque stopwords -al -anitz -arabera -asko -baina -bat -batean -batek -bati -batzuei -batzuek -batzuetan -batzuk -bera -beraiek -berau -berauek -bere -berori -beroriek -beste -bezala -da -dago -dira -ditu -du -dute -edo -egin -ere -eta -eurak -ez -gainera -gu -gutxi -guzti -haiei -haiek -haietan -hainbeste -hala -han -handik -hango -hara -hari -hark -hartan -hau -hauei -hauek -hauetan -hemen -hemendik -hemengo -hi -hona -honek -honela -honetan -honi -hor -hori -horiei -horiek -horietan -horko -horra -horrek -horrela -horretan -horri -hortik -hura -izan -ni -noiz -nola -non -nondik -nongo -nor -nora -ze -zein -zen -zenbait -zenbat -zer -zergatik -ziren -zituen -zu -zuek -zuen -zuten diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_fa.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_fa.txt deleted file mode 100644 index 723641c6da7..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_fa.txt +++ /dev/null @@ -1,313 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -# Note: by default this file is used after normalization, so when adding entries -# to this file, use the arabic 'ي' instead of 'ی' -انان -نداشته -سراسر -خياه -ايشان -وي -تاكنون -بيشتري -دوم -پس -ناشي -وگو -يا -داشتند -سپس -هنگام -هرگز -پنج -نشان -امسال -ديگر -گروهي -شدند -چطور -ده -و -دو -نخستين -ولي -چرا -چه -وسط -ه -كدام -قابل -يك -رفت -هفت -همچنين -در -هزار -بله -بلي -شايد -اما -شناسي -گرفته -دهد -داشته -دانست -داشتن -خواهيم -ميليارد -وقتيكه -امد -خواهد -جز -اورده -شده -بلكه -خدمات -شدن -برخي -نبود -بسياري -جلوگيري -حق -كردند -نوعي -بعري -نكرده -نظير -نبايد -بوده -بودن -داد -اورد -هست -جايي -شود -دنبال -داده -بايد -سابق -هيچ -همان -انجا -كمتر -كجاست -گردد -كسي -تر -مردم -تان -دادن -بودند -سري -جدا -ندارند -مگر -يكديگر -دارد -دهند -بنابراين -هنگامي -سمت -جا -انچه -خود -دادند -زياد -دارند -اثر -بدون -بهترين -بيشتر -البته -به -براساس -بيرون -كرد -بعضي -گرفت -توي -اي -ميليون -او -جريان -تول -بر -مانند -برابر -باشيم -مدتي -گويند -اكنون -تا -تنها -جديد -چند -بي -نشده -كردن -كردم -گويد -كرده -كنيم -نمي -نزد -روي -قصد -فقط -بالاي -ديگران -اين -ديروز -توسط -سوم -ايم -دانند -سوي -استفاده -شما -كنار -داريم -ساخته -طور -امده -رفته -نخست -بيست -نزديك -طي -كنيد -از -انها -تمامي -داشت -يكي -طريق -اش -چيست -روب -نمايد -گفت -چندين -چيزي -تواند -ام -ايا -با -ان -ايد -ترين -اينكه -ديگري -راه -هايي -بروز -همچنان -پاعين -كس -حدود -مختلف -مقابل -چيز -گيرد -ندارد -ضد -همچون -سازي -شان -مورد -باره -مرسي -خويش -برخوردار -چون -خارج -شش -هنوز -تحت -ضمن -هستيم -گفته -فكر -بسيار -پيش -براي -روزهاي -انكه -نخواهد -بالا -كل -وقتي -كي -چنين -كه -گيري -نيست -است -كجا -كند -نيز -يابد -بندي -حتي -توانند -عقب -خواست -كنند -بين -تمام -همه -ما -باشند -مثل -شد -اري -باشد -اره -طبق -بعد -اگر -صورت -غير -جاي -بيش -ريزي -اند -زيرا -چگونه -بار -لطفا -مي -درباره -من -ديده -همين -گذاري -برداري -علت -گذاشته -هم -فوق -نه -ها -شوند -اباد -همواره -هر -اول -خواهند -چهار -نام -امروز -مان -هاي -قبل -كنم -سعي -تازه -را -هستند -زير -جلوي -عنوان -بود diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_fi.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_fi.txt deleted file mode 100644 index 4372c9a055b..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_fi.txt +++ /dev/null @@ -1,97 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - -| forms of BE - -olla -olen -olet -on -olemme -olette -ovat -ole | negative form - -oli -olisi -olisit -olisin -olisimme -olisitte -olisivat -olit -olin -olimme -olitte -olivat -ollut -olleet - -en | negation -et -ei -emme -ette -eivät - -|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans -minä minun minut minua minussa minusta minuun minulla minulta minulle | I -sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you -hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she -me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we -te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you -he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they - -tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this -tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that -se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it -nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these -nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those -ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they - -kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who -ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) -mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what -mitkä | (pl) - -joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which -jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) - -| conjunctions - -että | that -ja | and -jos | if -koska | because -kuin | than -mutta | but -niin | so -sekä | and -sillä | for -tai | or -vaan | but -vai | or -vaikka | although - - -| prepositions - -kanssa | with -mukaan | according to -noin | about -poikki | across -yli | over, across - -| other - -kun | when -niin | so -nyt | now -itse | self - diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_fr.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_fr.txt deleted file mode 100644 index 749abae6846..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_fr.txt +++ /dev/null @@ -1,186 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A French stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -au | a + le -aux | a + les -avec | with -ce | this -ces | these -dans | with -de | of -des | de + les -du | de + le -elle | she -en | `of them' etc -et | and -eux | them -il | he -je | I -la | the -le | the -leur | their -lui | him -ma | my (fem) -mais | but -me | me -même | same; as in moi-même (myself) etc -mes | me (pl) -moi | me -mon | my (masc) -ne | not -nos | our (pl) -notre | our -nous | we -on | one -ou | where -par | by -pas | not -pour | for -qu | que before vowel -que | that -qui | who -sa | his, her (fem) -se | oneself -ses | his (pl) -son | his, her (masc) -sur | on -ta | thy (fem) -te | thee -tes | thy (pl) -toi | thee -ton | thy (masc) -tu | thou -un | a -une | a -vos | your (pl) -votre | your -vous | you - - | single letter forms - -c | c' -d | d' -j | j' -l | l' -à | to, at -m | m' -n | n' -s | s' -t | t' -y | there - - | forms of être (not including the infinitive): -été -étée -étées -étés -étant -suis -es -est -sommes -êtes -sont -serai -seras -sera -serons -serez -seront -serais -serait -serions -seriez -seraient -étais -était -étions -étiez -étaient -fus -fut -fûmes -fûtes -furent -sois -soit -soyons -soyez -soient -fusse -fusses -fût -fussions -fussiez -fussent - - | forms of avoir (not including the infinitive): -ayant -eu -eue -eues -eus -ai -as -avons -avez -ont -aurai -auras -aura -aurons -aurez -auront -aurais -aurait -aurions -auriez -auraient -avais -avait -avions -aviez -avaient -eut -eûmes -eûtes -eurent -aie -aies -ait -ayons -ayez -aient -eusse -eusses -eût -eussions -eussiez -eussent - - | Later additions (from Jean-Christophe Deschamps) -ceci | this -cela | that -celà | that -cet | this -cette | this -ici | here -ils | they -les | the (pl) -leurs | their (pl) -quel | which -quels | which -quelle | which -quelles | which -sans | without -soi | oneself - diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_ga.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_ga.txt deleted file mode 100644 index 9ff88d747e5..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_ga.txt +++ /dev/null @@ -1,110 +0,0 @@ - -a -ach -ag -agus -an -aon -ar -arna -as -b' -ba -beirt -bhúr -caoga -ceathair -ceathrar -chomh -chtó -chuig -chun -cois -céad -cúig -cúigear -d' -daichead -dar -de -deich -deichniúr -den -dhá -do -don -dtí -dá -dár -dó -faoi -faoin -faoina -faoinár -fara -fiche -gach -gan -go -gur -haon -hocht -i -iad -idir -in -ina -ins -inár -is -le -leis -lena -lenár -m' -mar -mo -mé -na -nach -naoi -naonúr -ná -ní -níor -nó -nócha -ocht -ochtar -os -roimh -sa -seacht -seachtar -seachtó -seasca -seisear -siad -sibh -sinn -sna -sé -sí -tar -thar -thú -triúr -trí -trína -trínár -tríocha -tú -um -ár -é -éis -í -ó -ón -óna -ónár diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_gl.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_gl.txt deleted file mode 100644 index d8760b12c14..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_gl.txt +++ /dev/null @@ -1,161 +0,0 @@ -# galican stopwords -a -aínda -alí -aquel -aquela -aquelas -aqueles -aquilo -aquí -ao -aos -as -así -á -ben -cando -che -co -coa -comigo -con -connosco -contigo -convosco -coas -cos -cun -cuns -cunha -cunhas -da -dalgunha -dalgunhas -dalgún -dalgúns -das -de -del -dela -delas -deles -desde -deste -do -dos -dun -duns -dunha -dunhas -e -el -ela -elas -eles -en -era -eran -esa -esas -ese -eses -esta -estar -estaba -está -están -este -estes -estiven -estou -eu -é -facer -foi -foron -fun -había -hai -iso -isto -la -las -lle -lles -lo -los -mais -me -meu -meus -min -miña -miñas -moi -na -nas -neste -nin -no -non -nos -nosa -nosas -noso -nosos -nós -nun -nunha -nuns -nunhas -o -os -ou -ó -ós -para -pero -pode -pois -pola -polas -polo -polos -por -que -se -senón -ser -seu -seus -sexa -sido -sobre -súa -súas -tamén -tan -te -ten -teñen -teño -ter -teu -teus -ti -tido -tiña -tiven -túa -túas -un -unha -unhas -uns -vos -vosa -vosas -voso -vosos -vós diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_hi.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_hi.txt deleted file mode 100644 index 86286bb083b..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_hi.txt +++ /dev/null @@ -1,235 +0,0 @@ -# Also see http://www.opensource.org/licenses/bsd-license.html -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# This file was created by Jacques Savoy and is distributed under the BSD license. -# Note: by default this file also contains forms normalized by HindiNormalizer -# for spelling variation (see section below), such that it can be used whether or -# not you enable that feature. When adding additional entries to this list, -# please add the normalized form as well. -अंदर -अत -अपना -अपनी -अपने -अभी -आदि -आप -इत्यादि -इन -इनका -इन्हीं -इन्हें -इन्हों -इस -इसका -इसकी -इसके -इसमें -इसी -इसे -उन -उनका -उनकी -उनके -उनको -उन्हीं -उन्हें -उन्हों -उस -उसके -उसी -उसे -एक -एवं -एस -ऐसे -और -कई -कर -करता -करते -करना -करने -करें -कहते -कहा -का -काफ़ी -कि -कितना -किन्हें -किन्हों -किया -किर -किस -किसी -किसे -की -कुछ -कुल -के -को -कोई -कौन -कौनसा -गया -घर -जब -जहाँ -जा -जितना -जिन -जिन्हें -जिन्हों -जिस -जिसे -जीधर -जैसा -जैसे -जो -तक -तब -तरह -तिन -तिन्हें -तिन्हों -तिस -तिसे -तो -था -थी -थे -दबारा -दिया -दुसरा -दूसरे -दो -द्वारा -न -नहीं -ना -निहायत -नीचे -ने -पर -पर -पहले -पूरा -पे -फिर -बनी -बही -बहुत -बाद -बाला -बिलकुल -भी -भीतर -मगर -मानो -मे -में -यदि -यह -यहाँ -यही -या -यिह -ये -रखें -रहा -रहे -ऱ्वासा -लिए -लिये -लेकिन -व -वर्ग -वह -वह -वहाँ -वहीं -वाले -वुह -वे -वग़ैरह -संग -सकता -सकते -सबसे -सभी -साथ -साबुत -साभ -सारा -से -सो -ही -हुआ -हुई -हुए -है -हैं -हो -होता -होती -होते -होना -होने -# additional normalized forms of the above -अपनि -जेसे -होति -सभि -तिंहों -इंहों -दवारा -इसि -किंहें -थि -उंहों -ओर -जिंहें -वहिं -अभि -बनि -हि -उंहिं -उंहें -हें -वगेरह -एसे -रवासा -कोन -निचे -काफि -उसि -पुरा -भितर -हे -बहि -वहां -कोइ -यहां -जिंहों -तिंहें -किसि -कइ -यहि -इंहिं -जिधर -इंहें -अदि -इतयादि -हुइ -कोनसा -इसकि -दुसरे -जहां -अप -किंहों -उनकि -भि -वरग -हुअ -जेसा -नहिं diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_hu.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_hu.txt deleted file mode 100644 index 37526da8aa9..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_hu.txt +++ /dev/null @@ -1,211 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - -| Hungarian stop word list -| prepared by Anna Tordai - -a -ahogy -ahol -aki -akik -akkor -alatt -által -általában -amely -amelyek -amelyekben -amelyeket -amelyet -amelynek -ami -amit -amolyan -amíg -amikor -át -abban -ahhoz -annak -arra -arról -az -azok -azon -azt -azzal -azért -aztán -azután -azonban -bár -be -belül -benne -cikk -cikkek -cikkeket -csak -de -e -eddig -egész -egy -egyes -egyetlen -egyéb -egyik -egyre -ekkor -el -elég -ellen -elő -először -előtt -első -én -éppen -ebben -ehhez -emilyen -ennek -erre -ez -ezt -ezek -ezen -ezzel -ezért -és -fel -felé -hanem -hiszen -hogy -hogyan -igen -így -illetve -ill. -ill -ilyen -ilyenkor -ison -ismét -itt -jó -jól -jobban -kell -kellett -keresztül -keressünk -ki -kívül -között -közül -legalább -lehet -lehetett -legyen -lenne -lenni -lesz -lett -maga -magát -majd -majd -már -más -másik -meg -még -mellett -mert -mely -melyek -mi -mit -míg -miért -milyen -mikor -minden -mindent -mindenki -mindig -mint -mintha -mivel -most -nagy -nagyobb -nagyon -ne -néha -nekem -neki -nem -néhány -nélkül -nincs -olyan -ott -össze -ő -ők -őket -pedig -persze -rá -s -saját -sem -semmi -sok -sokat -sokkal -számára -szemben -szerint -szinte -talán -tehát -teljes -tovább -továbbá -több -úgy -ugyanis -új -újabb -újra -után -utána -utolsó -vagy -vagyis -valaki -valami -valamint -való -vagyok -van -vannak -volt -voltam -voltak -voltunk -vissza -vele -viszont -volna diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_hy.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_hy.txt deleted file mode 100644 index 60c1c50fbc8..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_hy.txt +++ /dev/null @@ -1,46 +0,0 @@ -# example set of Armenian stopwords. -այդ -այլ -այն -այս -դու -դուք -եմ -են -ենք -ես -եք -է -էի -էին -էինք -էիր -էիք -էր -ըստ -թ -ի -ին -իսկ -իր -կամ -համար -հետ -հետո -մենք -մեջ -մի -ն -նա -նաև -նրա -նրանք -որ -որը -որոնք -որպես -ու -ում -պիտի -վրա -և diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_id.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_id.txt deleted file mode 100644 index 4617f83a5c5..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_id.txt +++ /dev/null @@ -1,359 +0,0 @@ -# from appendix D of: A Study of Stemming Effects on Information -# Retrieval in Bahasa Indonesia -ada -adanya -adalah -adapun -agak -agaknya -agar -akan -akankah -akhirnya -aku -akulah -amat -amatlah -anda -andalah -antar -diantaranya -antara -antaranya -diantara -apa -apaan -mengapa -apabila -apakah -apalagi -apatah -atau -ataukah -ataupun -bagai -bagaikan -sebagai -sebagainya -bagaimana -bagaimanapun -sebagaimana -bagaimanakah -bagi -bahkan -bahwa -bahwasanya -sebaliknya -banyak -sebanyak -beberapa -seberapa -begini -beginian -beginikah -beginilah -sebegini -begitu -begitukah -begitulah -begitupun -sebegitu -belum -belumlah -sebelum -sebelumnya -sebenarnya -berapa -berapakah -berapalah -berapapun -betulkah -sebetulnya -biasa -biasanya -bila -bilakah -bisa -bisakah -sebisanya -boleh -bolehkah -bolehlah -buat -bukan -bukankah -bukanlah -bukannya -cuma -percuma -dahulu -dalam -dan -dapat -dari -daripada -dekat -demi -demikian -demikianlah -sedemikian -dengan -depan -di -dia -dialah -dini -diri -dirinya -terdiri -dong -dulu -enggak -enggaknya -entah -entahlah -terhadap -terhadapnya -hal -hampir -hanya -hanyalah -harus -haruslah -harusnya -seharusnya -hendak -hendaklah -hendaknya -hingga -sehingga -ia -ialah -ibarat -ingin -inginkah -inginkan -ini -inikah -inilah -itu -itukah -itulah -jangan -jangankan -janganlah -jika -jikalau -juga -justru -kala -kalau -kalaulah -kalaupun -kalian -kami -kamilah -kamu -kamulah -kan -kapan -kapankah -kapanpun -dikarenakan -karena -karenanya -ke -kecil -kemudian -kenapa -kepada -kepadanya -ketika -seketika -khususnya -kini -kinilah -kiranya -sekiranya -kita -kitalah -kok -lagi -lagian -selagi -lah -lain -lainnya -melainkan -selaku -lalu -melalui -terlalu -lama -lamanya -selama -selama -selamanya -lebih -terlebih -bermacam -macam -semacam -maka -makanya -makin -malah -malahan -mampu -mampukah -mana -manakala -manalagi -masih -masihkah -semasih -masing -mau -maupun -semaunya -memang -mereka -merekalah -meski -meskipun -semula -mungkin -mungkinkah -nah -namun -nanti -nantinya -nyaris -oleh -olehnya -seorang -seseorang -pada -padanya -padahal -paling -sepanjang -pantas -sepantasnya -sepantasnyalah -para -pasti -pastilah -per -pernah -pula -pun -merupakan -rupanya -serupa -saat -saatnya -sesaat -saja -sajalah -saling -bersama -sama -sesama -sambil -sampai -sana -sangat -sangatlah -saya -sayalah -se -sebab -sebabnya -sebuah -tersebut -tersebutlah -sedang -sedangkan -sedikit -sedikitnya -segala -segalanya -segera -sesegera -sejak -sejenak -sekali -sekalian -sekalipun -sesekali -sekaligus -sekarang -sekarang -sekitar -sekitarnya -sela -selain -selalu -seluruh -seluruhnya -semakin -sementara -sempat -semua -semuanya -sendiri -sendirinya -seolah -seperti -sepertinya -sering -seringnya -serta -siapa -siapakah -siapapun -disini -disinilah -sini -sinilah -sesuatu -sesuatunya -suatu -sesudah -sesudahnya -sudah -sudahkah -sudahlah -supaya -tadi -tadinya -tak -tanpa -setelah -telah -tentang -tentu -tentulah -tentunya -tertentu -seterusnya -tapi -tetapi -setiap -tiap -setidaknya -tidak -tidakkah -tidaklah -toh -waduh -wah -wahai -sewaktu -walau -walaupun -wong -yaitu -yakni -yang diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_it.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_it.txt deleted file mode 100644 index 1219cc773ab..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_it.txt +++ /dev/null @@ -1,303 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | An Italian stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -ad | a (to) before vowel -al | a + il -allo | a + lo -ai | a + i -agli | a + gli -all | a + l' -agl | a + gl' -alla | a + la -alle | a + le -con | with -col | con + il -coi | con + i (forms collo, cogli etc are now very rare) -da | from -dal | da + il -dallo | da + lo -dai | da + i -dagli | da + gli -dall | da + l' -dagl | da + gll' -dalla | da + la -dalle | da + le -di | of -del | di + il -dello | di + lo -dei | di + i -degli | di + gli -dell | di + l' -degl | di + gl' -della | di + la -delle | di + le -in | in -nel | in + el -nello | in + lo -nei | in + i -negli | in + gli -nell | in + l' -negl | in + gl' -nella | in + la -nelle | in + le -su | on -sul | su + il -sullo | su + lo -sui | su + i -sugli | su + gli -sull | su + l' -sugl | su + gl' -sulla | su + la -sulle | su + le -per | through, by -tra | among -contro | against -io | I -tu | thou -lui | he -lei | she -noi | we -voi | you -loro | they -mio | my -mia | -miei | -mie | -tuo | -tua | -tuoi | thy -tue | -suo | -sua | -suoi | his, her -sue | -nostro | our -nostra | -nostri | -nostre | -vostro | your -vostra | -vostri | -vostre | -mi | me -ti | thee -ci | us, there -vi | you, there -lo | him, the -la | her, the -li | them -le | them, the -gli | to him, the -ne | from there etc -il | the -un | a -uno | a -una | a -ma | but -ed | and -se | if -perché | why, because -anche | also -come | how -dov | where (as dov') -dove | where -che | who, that -chi | who -cui | whom -non | not -più | more -quale | who, that -quanto | how much -quanti | -quanta | -quante | -quello | that -quelli | -quella | -quelle | -questo | this -questi | -questa | -queste | -si | yes -tutto | all -tutti | all - - | single letter forms: - -a | at -c | as c' for ce or ci -e | and -i | the -l | as l' -o | or - - | forms of avere, to have (not including the infinitive): - -ho -hai -ha -abbiamo -avete -hanno -abbia -abbiate -abbiano -avrò -avrai -avrà -avremo -avrete -avranno -avrei -avresti -avrebbe -avremmo -avreste -avrebbero -avevo -avevi -aveva -avevamo -avevate -avevano -ebbi -avesti -ebbe -avemmo -aveste -ebbero -avessi -avesse -avessimo -avessero -avendo -avuto -avuta -avuti -avute - - | forms of essere, to be (not including the infinitive): -sono -sei -è -siamo -siete -sia -siate -siano -sarò -sarai -sarà -saremo -sarete -saranno -sarei -saresti -sarebbe -saremmo -sareste -sarebbero -ero -eri -era -eravamo -eravate -erano -fui -fosti -fu -fummo -foste -furono -fossi -fosse -fossimo -fossero -essendo - - | forms of fare, to do (not including the infinitive, fa, fat-): -faccio -fai -facciamo -fanno -faccia -facciate -facciano -farò -farai -farà -faremo -farete -faranno -farei -faresti -farebbe -faremmo -fareste -farebbero -facevo -facevi -faceva -facevamo -facevate -facevano -feci -facesti -fece -facemmo -faceste -fecero -facessi -facesse -facessimo -facessero -facendo - - | forms of stare, to be (not including the infinitive): -sto -stai -sta -stiamo -stanno -stia -stiate -stiano -starò -starai -starà -staremo -starete -staranno -starei -staresti -starebbe -staremmo -stareste -starebbero -stavo -stavi -stava -stavamo -stavate -stavano -stetti -stesti -stette -stemmo -steste -stettero -stessi -stesse -stessimo -stessero -stando diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_ja.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_ja.txt deleted file mode 100644 index d4321be6b16..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_ja.txt +++ /dev/null @@ -1,127 +0,0 @@ -# -# This file defines a stopword set for Japanese. -# -# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. -# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 -# for frequency lists, etc. that can be useful for making your own set (if desired) -# -# Note that there is an overlap between these stopwords and the terms stopped when used -# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note -# that comments are not allowed on the same line as stopwords. -# -# Also note that stopping is done in a case-insensitive manner. Change your StopFilter -# configuration if you need case-sensitive stopping. Lastly, note that stopping is done -# using the same character width as the entries in this file. Since this StopFilter is -# normally done after a CJKWidthFilter in your chain, you would usually want your romaji -# entries to be in half-width and your kana entries to be in full-width. -# -の -に -は -を -た -が -で -て -と -し -れ -さ -ある -いる -も -する -から -な -こと -として -い -や -れる -など -なっ -ない -この -ため -その -あっ -よう -また -もの -という -あり -まで -られ -なる -へ -か -だ -これ -によって -により -おり -より -による -ず -なり -られる -において -ば -なかっ -なく -しかし -について -せ -だっ -その後 -できる -それ -う -ので -なお -のみ -でき -き -つ -における -および -いう -さらに -でも -ら -たり -その他 -に関する -たち -ます -ん -なら -に対して -特に -せる -及び -これら -とき -では -にて -ほか -ながら -うち -そして -とともに -ただし -かつて -それぞれ -または -お -ほど -ものの -に対する -ほとんど -と共に -といった -です -とも -ところ -ここ -##### End of file diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_lv.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_lv.txt deleted file mode 100644 index e21a23c06c3..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_lv.txt +++ /dev/null @@ -1,172 +0,0 @@ -# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins -# the original list of over 800 forms was refined: -# pronouns, adverbs, interjections were removed -# -# prepositions -aiz -ap -ar -apakš -ārpus -augšpus -bez -caur -dēļ -gar -iekš -iz -kopš -labad -lejpus -līdz -no -otrpus -pa -par -pār -pēc -pie -pirms -pret -priekš -starp -šaipus -uz -viņpus -virs -virspus -zem -apakšpus -# Conjunctions -un -bet -jo -ja -ka -lai -tomēr -tikko -turpretī -arī -kaut -gan -tādēļ -tā -ne -tikvien -vien -kā -ir -te -vai -kamēr -# Particles -ar -diezin -droši -diemžēl -nebūt -ik -it -taču -nu -pat -tiklab -iekšpus -nedz -tik -nevis -turpretim -jeb -iekam -iekām -iekāms -kolīdz -līdzko -tiklīdz -jebšu -tālab -tāpēc -nekā -itin -jā -jau -jel -nē -nezin -tad -tikai -vis -tak -iekams -vien -# modal verbs -būt -biju -biji -bija -bijām -bijāt -esmu -esi -esam -esat -būšu -būsi -būs -būsim -būsiet -tikt -tiku -tiki -tika -tikām -tikāt -tieku -tiec -tiek -tiekam -tiekat -tikšu -tiks -tiksim -tiksiet -tapt -tapi -tapāt -topat -tapšu -tapsi -taps -tapsim -tapsiet -kļūt -kļuvu -kļuvi -kļuva -kļuvām -kļuvāt -kļūstu -kļūsti -kļūst -kļūstam -kļūstat -kļūšu -kļūsi -kļūs -kļūsim -kļūsiet -# verbs -varēt -varēju -varējām -varēšu -varēsim -var -varēji -varējāt -varēsi -varēsiet -varat -varēja -varēs diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_nl.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_nl.txt deleted file mode 100644 index 47a2aeacf6f..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_nl.txt +++ /dev/null @@ -1,119 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Dutch stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large sample of Dutch text. - - | Dutch stop words frequently exhibit homonym clashes. These are indicated - | clearly below. - -de | the -en | and -van | of, from -ik | I, the ego -te | (1) chez, at etc, (2) to, (3) too -dat | that, which -die | that, those, who, which -in | in, inside -een | a, an, one -hij | he -het | the, it -niet | not, nothing, naught -zijn | (1) to be, being, (2) his, one's, its -is | is -was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river -op | on, upon, at, in, up, used up -aan | on, upon, to (as dative) -met | with, by -als | like, such as, when -voor | (1) before, in front of, (2) furrow -had | had, past tense all persons sing. of 'hebben' (have) -er | there -maar | but, only -om | round, about, for etc -hem | him -dan | then -zou | should/would, past tense all persons sing. of 'zullen' -of | or, whether, if -wat | what, something, anything -mijn | possessive and noun 'mine' -men | people, 'one' -dit | this -zo | so, thus, in this way -door | through by -over | over, across -ze | she, her, they, them -zich | oneself -bij | (1) a bee, (2) by, near, at -ook | also, too -tot | till, until -je | you -mij | me -uit | out of, from -der | Old Dutch form of 'van der' still found in surnames -daar | (1) there, (2) because -haar | (1) her, their, them, (2) hair -naar | (1) unpleasant, unwell etc, (2) towards, (3) as -heb | present first person sing. of 'to have' -hoe | how, why -heeft | present third person sing. of 'to have' -hebben | 'to have' and various parts thereof -deze | this -u | you -want | (1) for, (2) mitten, (3) rigging -nog | yet, still -zal | 'shall', first and third person sing. of verb 'zullen' (will) -me | me -zij | she, they -nu | now -ge | 'thou', still used in Belgium and south Netherlands -geen | none -omdat | because -iets | something, somewhat -worden | to become, grow, get -toch | yet, still -al | all, every, each -waren | (1) 'were' (2) to wander, (3) wares, (3) -veel | much, many -meer | (1) more, (2) lake -doen | to do, to make -toen | then, when -moet | noun 'spot/mote' and present form of 'to must' -ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' -zonder | without -kan | noun 'can' and present form of 'to be able' -hun | their, them -dus | so, consequently -alles | all, everything, anything -onder | under, beneath -ja | yes, of course -eens | once, one day -hier | here -wie | who -werd | imperfect third person sing. of 'become' -altijd | always -doch | yet, but etc -wordt | present third person sing. of 'become' -wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans -kunnen | to be able -ons | us/our -zelf | self -tegen | against, towards, at -na | after, near -reeds | already -wil | (1) present tense of 'want', (2) 'will', noun, (3) fender -kon | could; past tense of 'to be able' -niets | nothing -uw | your -iemand | somebody -geweest | been; past participle of 'be' -andere | other diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_no.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_no.txt deleted file mode 100644 index a7a2c28ba54..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_no.txt +++ /dev/null @@ -1,194 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Norwegian stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This stop word list is for the dominant bokmål dialect. Words unique - | to nynorsk are marked *. - - | Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005 - -og | and -i | in -jeg | I -det | it/this/that -at | to (w. inf.) -en | a/an -et | a/an -den | it/this/that -til | to -er | is/am/are -som | who/that -på | on -de | they / you(formal) -med | with -han | he -av | of -ikke | not -ikkje | not * -der | there -så | so -var | was/were -meg | me -seg | you -men | but -ett | one -har | have -om | about -vi | we -min | my -mitt | my -ha | have -hadde | had -hun | she -nå | now -over | over -da | when/as -ved | by/know -fra | from -du | you -ut | out -sin | your -dem | them -oss | us -opp | up -man | you/one -kan | can -hans | his -hvor | where -eller | or -hva | what -skal | shall/must -selv | self (reflective) -sjøl | self (reflective) -her | here -alle | all -vil | will -bli | become -ble | became -blei | became * -blitt | have become -kunne | could -inn | in -når | when -være | be -kom | come -noen | some -noe | some -ville | would -dere | you -som | who/which/that -deres | their/theirs -kun | only/just -ja | yes -etter | after -ned | down -skulle | should -denne | this -for | for/because -deg | you -si | hers/his -sine | hers/his -sitt | hers/his -mot | against -å | to -meget | much -hvorfor | why -dette | this -disse | these/those -uten | without -hvordan | how -ingen | none -din | your -ditt | your -blir | become -samme | same -hvilken | which -hvilke | which (plural) -sånn | such a -inni | inside/within -mellom | between -vår | our -hver | each -hvem | who -vors | us/ours -hvis | whose -både | both -bare | only/just -enn | than -fordi | as/because -før | before -mange | many -også | also -slik | just -vært | been -være | to be -båe | both * -begge | both -siden | since -dykk | your * -dykkar | yours * -dei | they * -deira | them * -deires | theirs * -deim | them * -di | your (fem.) * -då | as/when * -eg | I * -ein | a/an * -eit | a/an * -eitt | a/an * -elles | or * -honom | he * -hjå | at * -ho | she * -hoe | she * -henne | her -hennar | her/hers -hennes | hers -hoss | how * -hossen | how * -ikkje | not * -ingi | noone * -inkje | noone * -korleis | how * -korso | how * -kva | what/which * -kvar | where * -kvarhelst | where * -kven | who/whom * -kvi | why * -kvifor | why * -me | we * -medan | while * -mi | my * -mine | my * -mykje | much * -no | now * -nokon | some (masc./neut.) * -noka | some (fem.) * -nokor | some * -noko | some * -nokre | some * -si | his/hers * -sia | since * -sidan | since * -so | so * -somt | some * -somme | some * -um | about* -upp | up * -vere | be * -vore | was * -verte | become * -vort | become * -varte | became * -vart | became * - diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_pt.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_pt.txt deleted file mode 100644 index acfeb01af6b..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_pt.txt +++ /dev/null @@ -1,253 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Portuguese stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - - | The following is a ranked list (commonest to rarest) of stopwords - | deriving from a large sample of text. - - | Extra words have been added at the end. - -de | of, from -a | the; to, at; her -o | the; him -que | who, that -e | and -do | de + o -da | de + a -em | in -um | a -para | for - | é from SER -com | with -não | not, no -uma | a -os | the; them -no | em + o -se | himself etc -na | em + a -por | for -mais | more -as | the; them -dos | de + os -como | as, like -mas | but - | foi from SER -ao | a + o -ele | he -das | de + as - | tem from TER -à | a + a -seu | his -sua | her -ou | or - | ser from SER -quando | when -muito | much - | há from HAV -nos | em + os; us -já | already, now - | está from EST -eu | I -também | also -só | only, just -pelo | per + o -pela | per + a -até | up to -isso | that -ela | he -entre | between - | era from SER -depois | after -sem | without -mesmo | same -aos | a + os - | ter from TER -seus | his -quem | whom -nas | em + as -me | me -esse | that -eles | they - | estão from EST -você | you - | tinha from TER - | foram from SER -essa | that -num | em + um -nem | nor -suas | her -meu | my -às | a + as -minha | my - | têm from TER -numa | em + uma -pelos | per + os -elas | they - | havia from HAV - | seja from SER -qual | which - | será from SER -nós | we - | tenho from TER -lhe | to him, her -deles | of them -essas | those -esses | those -pelas | per + as -este | this - | fosse from SER -dele | of him - - | other words. There are many contractions such as naquele = em+aquele, - | mo = me+o, but they are rare. - | Indefinite article plural forms are also rare. - -tu | thou -te | thee -vocês | you (plural) -vos | you -lhes | to them -meus | my -minhas -teu | thy -tua -teus -tuas -nosso | our -nossa -nossos -nossas - -dela | of her -delas | of them - -esta | this -estes | these -estas | these -aquele | that -aquela | that -aqueles | those -aquelas | those -isto | this -aquilo | that - - | forms of estar, to be (not including the infinitive): -estou -está -estamos -estão -estive -esteve -estivemos -estiveram -estava -estávamos -estavam -estivera -estivéramos -esteja -estejamos -estejam -estivesse -estivéssemos -estivessem -estiver -estivermos -estiverem - - | forms of haver, to have (not including the infinitive): -hei -há -havemos -hão -houve -houvemos -houveram -houvera -houvéramos -haja -hajamos -hajam -houvesse -houvéssemos -houvessem -houver -houvermos -houverem -houverei -houverá -houveremos -houverão -houveria -houveríamos -houveriam - - | forms of ser, to be (not including the infinitive): -sou -somos -são -era -éramos -eram -fui -foi -fomos -foram -fora -fôramos -seja -sejamos -sejam -fosse -fôssemos -fossem -for -formos -forem -serei -será -seremos -serão -seria -seríamos -seriam - - | forms of ter, to have (not including the infinitive): -tenho -tem -temos -tém -tinha -tínhamos -tinham -tive -teve -tivemos -tiveram -tivera -tivéramos -tenha -tenhamos -tenham -tivesse -tivéssemos -tivessem -tiver -tivermos -tiverem -terei -terá -teremos -terão -teria -teríamos -teriam diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_ro.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_ro.txt deleted file mode 100644 index 4fdee90a5ba..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_ro.txt +++ /dev/null @@ -1,233 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -acea -aceasta -această -aceea -acei -aceia -acel -acela -acele -acelea -acest -acesta -aceste -acestea -aceşti -aceştia -acolo -acum -ai -aia -aibă -aici -al -ăla -ale -alea -ălea -altceva -altcineva -am -ar -are -aş -aşadar -asemenea -asta -ăsta -astăzi -astea -ăstea -ăştia -asupra -aţi -au -avea -avem -aveţi -azi -bine -bucur -bună -ca -că -căci -când -care -cărei -căror -cărui -cât -câte -câţi -către -câtva -ce -cel -ceva -chiar -cînd -cine -cineva -cît -cîte -cîţi -cîtva -contra -cu -cum -cumva -curând -curînd -da -dă -dacă -dar -datorită -de -deci -deja -deoarece -departe -deşi -din -dinaintea -dintr -dintre -drept -după -ea -ei -el -ele -eram -este -eşti -eu -face -fără -fi -fie -fiecare -fii -fim -fiţi -iar -ieri -îi -îl -îmi -împotriva -în -înainte -înaintea -încât -încît -încotro -între -întrucât -întrucît -îţi -la -lângă -le -li -lîngă -lor -lui -mă -mâine -mea -mei -mele -mereu -meu -mi -mine -mult -multă -mulţi -ne -nicăieri -nici -nimeni -nişte -noastră -noastre -noi -noştri -nostru -nu -ori -oricând -oricare -oricât -orice -oricînd -oricine -oricît -oricum -oriunde -până -pe -pentru -peste -pînă -poate -pot -prea -prima -primul -prin -printr -sa -să -săi -sale -sau -său -se -şi -sînt -sîntem -sînteţi -spre -sub -sunt -suntem -sunteţi -ta -tăi -tale -tău -te -ţi -ţie -tine -toată -toate -tot -toţi -totuşi -tu -un -una -unde -undeva -unei -unele -uneori -unor -vă -vi -voastră -voastre -voi -voştri -vostru -vouă -vreo -vreun diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_ru.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_ru.txt deleted file mode 100644 index 55271400c64..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_ru.txt +++ /dev/null @@ -1,243 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | a russian stop word list. comments begin with vertical bar. each stop - | word is at the start of a line. - - | this is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | letter `ё' is translated to `е'. - -и | and -в | in/into -во | alternative form -не | not -что | what/that -он | he -на | on/onto -я | i -с | from -со | alternative form -как | how -а | milder form of `no' (but) -то | conjunction and form of `that' -все | all -она | she -так | so, thus -его | him -но | but -да | yes/and -ты | thou -к | towards, by -у | around, chez -же | intensifier particle -вы | you -за | beyond, behind -бы | conditional/subj. particle -по | up to, along -только | only -ее | her -мне | to me -было | it was -вот | here is/are, particle -от | away from -меня | me -еще | still, yet, more -нет | no, there isnt/arent -о | about -из | out of -ему | to him -теперь | now -когда | when -даже | even -ну | so, well -вдруг | suddenly -ли | interrogative particle -если | if -уже | already, but homonym of `narrower' -или | or -ни | neither -быть | to be -был | he was -него | prepositional form of его -до | up to -вас | you accusative -нибудь | indef. suffix preceded by hyphen -опять | again -уж | already, but homonym of `adder' -вам | to you -сказал | he said -ведь | particle `after all' -там | there -потом | then -себя | oneself -ничего | nothing -ей | to her -может | usually with `быть' as `maybe' -они | they -тут | here -где | where -есть | there is/are -надо | got to, must -ней | prepositional form of ей -для | for -мы | we -тебя | thee -их | them, their -чем | than -была | she was -сам | self -чтоб | in order to -без | without -будто | as if -человек | man, person, one -чего | genitive form of `what' -раз | once -тоже | also -себе | to oneself -под | beneath -жизнь | life -будет | will be -ж | short form of intensifer particle `же' -тогда | then -кто | who -этот | this -говорил | was saying -того | genitive form of `that' -потому | for that reason -этого | genitive form of `this' -какой | which -совсем | altogether -ним | prepositional form of `его', `они' -здесь | here -этом | prepositional form of `этот' -один | one -почти | almost -мой | my -тем | instrumental/dative plural of `тот', `то' -чтобы | full form of `in order that' -нее | her (acc.) -кажется | it seems -сейчас | now -были | they were -куда | where to -зачем | why -сказать | to say -всех | all (acc., gen. preposn. plural) -никогда | never -сегодня | today -можно | possible, one can -при | by -наконец | finally -два | two -об | alternative form of `о', about -другой | another -хоть | even -после | after -над | above -больше | more -тот | that one (masc.) -через | across, in -эти | these -нас | us -про | about -всего | in all, only, of all -них | prepositional form of `они' (they) -какая | which, feminine -много | lots -разве | interrogative particle -сказала | she said -три | three -эту | this, acc. fem. sing. -моя | my, feminine -впрочем | moreover, besides -хорошо | good -свою | ones own, acc. fem. sing. -этой | oblique form of `эта', fem. `this' -перед | in front of -иногда | sometimes -лучше | better -чуть | a little -том | preposn. form of `that one' -нельзя | one must not -такой | such a one -им | to them -более | more -всегда | always -конечно | of course -всю | acc. fem. sing of `all' -между | between - - - | b: some paradigms - | - | personal pronouns - | - | я меня мне мной [мною] - | ты тебя тебе тобой [тобою] - | он его ему им [него, нему, ним] - | она ее эи ею [нее, нэи, нею] - | оно его ему им [него, нему, ним] - | - | мы нас нам нами - | вы вас вам вами - | они их им ими [них, ним, ними] - | - | себя себе собой [собою] - | - | demonstrative pronouns: этот (this), тот (that) - | - | этот эта это эти - | этого эты это эти - | этого этой этого этих - | этому этой этому этим - | этим этой этим [этою] этими - | этом этой этом этих - | - | тот та то те - | того ту то те - | того той того тех - | тому той тому тем - | тем той тем [тою] теми - | том той том тех - | - | determinative pronouns - | - | (a) весь (all) - | - | весь вся все все - | всего всю все все - | всего всей всего всех - | всему всей всему всем - | всем всей всем [всею] всеми - | всем всей всем всех - | - | (b) сам (himself etc) - | - | сам сама само сами - | самого саму само самих - | самого самой самого самих - | самому самой самому самим - | самим самой самим [самою] самими - | самом самой самом самих - | - | stems of verbs `to be', `to have', `to do' and modal - | - | быть бы буд быв есть суть - | име - | дел - | мог мож мочь - | уме - | хоч хот - | долж - | можн - | нужн - | нельзя - diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_sv.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_sv.txt deleted file mode 100644 index 096f87f6766..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_sv.txt +++ /dev/null @@ -1,133 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Swedish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | Swedish stop words occasionally exhibit homonym clashes. For example - | så = so, but also seed. These are indicated clearly below. - -och | and -det | it, this/that -att | to (with infinitive) -i | in, at -en | a -jag | I -hon | she -som | who, that -han | he -på | on -den | it, this/that -med | with -var | where, each -sig | him(self) etc -för | for -så | so (also: seed) -till | to -är | is -men | but -ett | a -om | if; around, about -hade | had -de | they, these/those -av | of -icke | not, no -mig | me -du | you -henne | her -då | then, when -sin | his -nu | now -har | have -inte | inte någon = no one -hans | his -honom | him -skulle | 'sake' -hennes | her -där | there -min | my -man | one (pronoun) -ej | nor -vid | at, by, on (also: vast) -kunde | could -något | some etc -från | from, off -ut | out -när | when -efter | after, behind -upp | up -vi | we -dem | them -vara | be -vad | what -över | over -än | than -dig | you -kan | can -sina | his -här | here -ha | have -mot | towards -alla | all -under | under (also: wonder) -någon | some etc -eller | or (else) -allt | all -mycket | much -sedan | since -ju | why -denna | this/that -själv | myself, yourself etc -detta | this/that -åt | to -utan | without -varit | was -hur | how -ingen | no -mitt | my -ni | you -bli | to be, become -blev | from bli -oss | us -din | thy -dessa | these/those -några | some etc -deras | their -blir | from bli -mina | my -samma | (the) same -vilken | who, that -er | you, your -sådan | such a -vår | our -blivit | from bli -dess | its -inom | within -mellan | between -sådant | such a -varför | why -varje | each -vilka | who, that -ditt | thy -vem | who -vilket | who, that -sitta | his -sådana | such a -vart | each -dina | thy -vars | whose -vårt | our -våra | our -ert | your -era | your -vilkas | whose - diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_th.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_th.txt deleted file mode 100644 index 07f0fabe692..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_th.txt +++ /dev/null @@ -1,119 +0,0 @@ -# Thai stopwords from: -# "Opinion Detection in Thai Political News Columns -# Based on Subjectivity Analysis" -# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak -ไว้ -ไม่ -ไป -ได้ -ให้ -ใน -โดย -แห่ง -แล้ว -และ -แรก -แบบ -แต่ -เอง -เห็น -เลย -เริ่ม -เรา -เมื่อ -เพื่อ -เพราะ -เป็นการ -เป็น -เปิดเผย -เปิด -เนื่องจาก -เดียวกัน -เดียว -เช่น -เฉพาะ -เคย -เข้า -เขา -อีก -อาจ -อะไร -ออก -อย่าง -อยู่ -อยาก -หาก -หลาย -หลังจาก -หลัง -หรือ -หนึ่ง -ส่วน -ส่ง -สุด -สําหรับ -ว่า -วัน -ลง -ร่วม -ราย -รับ -ระหว่าง -รวม -ยัง -มี -มาก -มา -พร้อม -พบ -ผ่าน -ผล -บาง -น่า -นี้ -นํา -นั้น -นัก -นอกจาก -ทุก -ที่สุด -ที่ -ทําให้ -ทํา -ทาง -ทั้งนี้ -ทั้ง -ถ้า -ถูก -ถึง -ต้อง -ต่างๆ -ต่าง -ต่อ -ตาม -ตั้งแต่ -ตั้ง -ด้าน -ด้วย -ดัง -ซึ่ง -ช่วง -จึง -จาก -จัด -จะ -คือ -ความ -ครั้ง -คง -ขึ้น -ของ -ขอ -ขณะ -ก่อน -ก็ -การ -กับ -กัน -กว่า -กล่าว diff --git a/solr-config/cores/fundingSubType/conf/lang/stopwords_tr.txt b/solr-config/cores/fundingSubType/conf/lang/stopwords_tr.txt deleted file mode 100644 index 84d9408d4ea..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/stopwords_tr.txt +++ /dev/null @@ -1,212 +0,0 @@ -# Turkish stopwords from LUCENE-559 -# merged with the list from "Information Retrieval on Turkish Texts" -# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) -acaba -altmış -altı -ama -ancak -arada -aslında -ayrıca -bana -bazı -belki -ben -benden -beni -benim -beri -beş -bile -bin -bir -birçok -biri -birkaç -birkez -birşey -birşeyi -biz -bize -bizden -bizi -bizim -böyle -böylece -bu -buna -bunda -bundan -bunlar -bunları -bunların -bunu -bunun -burada -çok -çünkü -da -daha -dahi -de -defa -değil -diğer -diye -doksan -dokuz -dolayı -dolayısıyla -dört -edecek -eden -ederek -edilecek -ediliyor -edilmesi -ediyor -eğer -elli -en -etmesi -etti -ettiği -ettiğini -gibi -göre -halen -hangi -hatta -hem -henüz -hep -hepsi -her -herhangi -herkesin -hiç -hiçbir -için -iki -ile -ilgili -ise -işte -itibaren -itibariyle -kadar -karşın -katrilyon -kendi -kendilerine -kendini -kendisi -kendisine -kendisini -kez -ki -kim -kimden -kime -kimi -kimse -kırk -milyar -milyon -mu -mü -mı -nasıl -ne -neden -nedenle -nerde -nerede -nereye -niye -niçin -o -olan -olarak -oldu -olduğu -olduğunu -olduklarını -olmadı -olmadığı -olmak -olması -olmayan -olmaz -olsa -olsun -olup -olur -olursa -oluyor -on -ona -ondan -onlar -onlardan -onları -onların -onu -onun -otuz -oysa -öyle -pek -rağmen -sadece -sanki -sekiz -seksen -sen -senden -seni -senin -siz -sizden -sizi -sizin -şey -şeyden -şeyi -şeyler -şöyle -şu -şuna -şunda -şundan -şunları -şunu -tarafından -trilyon -tüm -üç -üzere -var -vardı -ve -veya -ya -yani -yapacak -yapılan -yapılması -yapıyor -yapmak -yaptı -yaptığı -yaptığını -yaptıkları -yedi -yerine -yetmiş -yine -yirmi -yoksa -yüz -zaten diff --git a/solr-config/cores/fundingSubType/conf/lang/userdict_ja.txt b/solr-config/cores/fundingSubType/conf/lang/userdict_ja.txt deleted file mode 100644 index 6f0368e4d81..00000000000 --- a/solr-config/cores/fundingSubType/conf/lang/userdict_ja.txt +++ /dev/null @@ -1,29 +0,0 @@ -# -# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) -# -# Add entries to this file in order to override the statistical model in terms -# of segmentation, readings and part-of-speech tags. Notice that entries do -# not have weights since they are always used when found. This is by-design -# in order to maximize ease-of-use. -# -# Entries are defined using the following CSV format: -# <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag> -# -# Notice that a single half-width space separates tokens and readings, and -# that the number tokens and readings must match exactly. -# -# Also notice that multiple entries with the same <text> is undefined. -# -# Whitespace only lines are ignored. Comments are not allowed on entry lines. -# - -# Custom segmentation for kanji compounds -日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 -関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 - -# Custom segmentation for compound katakana -トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 -ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 - -# Custom reading for former sumo wrestler -朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/solr-config/cores/fundingSubType/conf/params.json b/solr-config/cores/fundingSubType/conf/params.json deleted file mode 100644 index 06114ef2577..00000000000 --- a/solr-config/cores/fundingSubType/conf/params.json +++ /dev/null @@ -1,20 +0,0 @@ -{"params":{ - "query":{ - "defType":"edismax", - "q.alt":"*:*", - "rows":"10", - "fl":"*,score", - "":{"v":0} - }, - "facets":{ - "facet":"on", - "facet.mincount": "1", - "":{"v":0} - }, - "velocity":{ - "wt": "velocity", - "v.template":"browse", - "v.layout": "layout", - "":{"v":0} - } -}} \ No newline at end of file diff --git a/solr-config/cores/fundingSubType/conf/protwords.txt b/solr-config/cores/fundingSubType/conf/protwords.txt deleted file mode 100644 index 1dfc0abecbf..00000000000 --- a/solr-config/cores/fundingSubType/conf/protwords.txt +++ /dev/null @@ -1,21 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# Use a protected word file to protect against the stemmer reducing two -# unrelated words to the same base word. - -# Some non-words that normally won't be encountered, -# just to test that they won't be stemmed. -dontstems -zwhacky - diff --git a/solr-config/cores/fundingSubType/conf/schema.xml b/solr-config/cores/fundingSubType/conf/schema.xml deleted file mode 100644 index e19e1317a31..00000000000 --- a/solr-config/cores/fundingSubType/conf/schema.xml +++ /dev/null @@ -1,1009 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<!-- - - This example schema is the recommended starting point for users. - It should be kept correct and concise, usable out-of-the-box. - - - For more information, on how to customize this file, please see - http://lucene.apache.org/solr/guide/documents-fields-and-schema-design.html - - PERFORMANCE NOTE: this schema includes many optional features and should not - be used for benchmarking. To improve performance one could - - set stored="false" for all fields possible (esp large fields) when you - only need to search on the field but don't need to return the original - value. - - set indexed="false" if you don't need to search on the field, but only - return the field as a result of searching on other indexed fields. - - remove all unneeded copyField statements - - for best index size and searching performance, set "index" to false - for all general text fields, use copyField to copy them to the - catchall "text" field, and use that for searching. ---> - -<schema name="default-config" version="1.6"> - <!-- attribute "name" is the name of this schema and is only used for display purposes. - version="x.y" is Solr's version number for the schema syntax and - semantics. It should not normally be changed by applications. - - 1.0: multiValued attribute did not exist, all fields are multiValued - by nature - 1.1: multiValued attribute introduced, false by default - 1.2: omitTermFreqAndPositions attribute introduced, true by default - except for text fields. - 1.3: removed optional field compress feature - 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser - behavior when a single string produces multiple tokens. Defaults - to off for version >= 1.4 - 1.5: omitNorms defaults to true for primitive field types - (int, float, boolean, string...) - 1.6: useDocValuesAsStored defaults to true. - --> - - <!-- Valid attributes for fields: - name: mandatory - the name for the field - type: mandatory - the name of a field type from the - fieldTypes section - indexed: true if this field should be indexed (searchable or sortable) - stored: true if this field should be retrievable - docValues: true if this field should have doc values. Doc Values is - recommended (required, if you are using *Point fields) for faceting, - grouping, sorting and function queries. Doc Values will make the index - faster to load, more NRT-friendly and more memory-efficient. - They are currently only supported by StrField, UUIDField, all - *PointFields, and depending on the field type, they might require - the field to be single-valued, be required or have a default value - (check the documentation of the field type you're interested in for - more information) - multiValued: true if this field may contain multiple values per document - omitNorms: (expert) set to true to omit the norms associated with - this field (this disables length normalization and index-time - boosting for the field, and saves some memory). Only full-text - fields or fields that need an index-time boost need norms. - Norms are omitted for primitive (non-analyzed) types by default. - termVectors: [false] set to true to store the term vector for a - given field. - When using MoreLikeThis, fields used for similarity should be - stored for best performance. - termPositions: Store position information with the term vector. - This will increase storage costs. - termOffsets: Store offset information with the term vector. This - will increase storage costs. - required: The field is required. It will throw an error if the - value does not exist - default: a value that should be used if no value is specified - when adding a document. - --> - - <!-- field names should consist of alphanumeric or underscore characters only and - not start with a digit. This is not currently strictly enforced, - but other field names will not have first class support from all components - and back compatibility is not guaranteed. Names with both leading and - trailing underscores (e.g. _version_) are reserved. - --> - - <!-- In this _default configset, only four fields are pre-declared: - id, _version_, and _text_ and _root_. All other fields will be type guessed and added via the - "add-unknown-fields-to-the-schema" update request processor chain declared in solrconfig.xml. - - Note that many dynamic fields are also defined - you can use them to specify a - field's type via field naming conventions - see below. - - WARNING: The _text_ catch-all field will significantly increase your index size. - If you don't need it, consider removing it and the corresponding copyField directive. - --> - - <!-- docValues are enabled by default for long type so we don't need to index the version field --> - <field name="_version_" type="plong" indexed="false" stored="false"/> - <field name="_root_" type="string" indexed="true" stored="false" docValues="false" /> - <field name="_text_" type="text_general" indexed="true" stored="false" multiValued="true"/> - - <!-- This can be enabled, in case the client does not know what fields may be searched. It isn't enabled by default - because it's very expensive to index everything twice. --> - <!-- <copyField source="*" dest="_text_"/> --> - - <!-- Dynamic field definitions allow using convention over configuration - for fields via the specification of patterns to match field names. - EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i) - RESTRICTION: the glob-like pattern in the name attribute must have a "*" only at the start or the end. --> - - <dynamicField name="*_i" type="pint" indexed="true" stored="true"/> - <dynamicField name="*_is" type="pints" indexed="true" stored="true"/> - <dynamicField name="*_s" type="string" indexed="true" stored="true" /> - <dynamicField name="*_ss" type="strings" indexed="true" stored="true"/> - <dynamicField name="*_l" type="plong" indexed="true" stored="true"/> - <dynamicField name="*_ls" type="plongs" indexed="true" stored="true"/> - <dynamicField name="*_t" type="text_general" indexed="true" stored="true" multiValued="false"/> - <dynamicField name="*_txt" type="text_general" indexed="true" stored="true"/> - <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/> - <dynamicField name="*_bs" type="booleans" indexed="true" stored="true"/> - <dynamicField name="*_f" type="pfloat" indexed="true" stored="true"/> - <dynamicField name="*_fs" type="pfloats" indexed="true" stored="true"/> - <dynamicField name="*_d" type="pdouble" indexed="true" stored="true"/> - <dynamicField name="*_ds" type="pdoubles" indexed="true" stored="true"/> - <dynamicField name="random_*" type="random"/> - - <!-- Type used for data-driven schema, to add a string copy for each text field --> - <dynamicField name="*_str" type="strings" stored="false" docValues="true" indexed="false" useDocValuesAsStored="false"/> - - <dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/> - <dynamicField name="*_dts" type="pdate" indexed="true" stored="true" multiValued="true"/> - <dynamicField name="*_p" type="location" indexed="true" stored="true"/> - <dynamicField name="*_srpt" type="location_rpt" indexed="true" stored="true"/> - - <!-- payloaded dynamic fields --> - <dynamicField name="*_dpf" type="delimited_payloads_float" indexed="true" stored="true"/> - <dynamicField name="*_dpi" type="delimited_payloads_int" indexed="true" stored="true"/> - <dynamicField name="*_dps" type="delimited_payloads_string" indexed="true" stored="true"/> - - <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/> - - <!-- Field to use to determine and enforce document uniqueness. - Unless this field is marked with required="false", it will be a required field - --> - <!-- copyField commands copy one field to another at the time a document - is added to the index. It's used either to index the same field differently, - or to add multiple fields to the same field for easier/faster searching. - - <copyField source="sourceFieldName" dest="destinationFieldName"/> - --> - - <!-- field type definitions. The "name" attribute is - just a label to be used by field definitions. The "class" - attribute and any other attributes determine the real - behavior of the fieldType. - Class names starting with "solr" refer to java classes in a - standard package such as org.apache.solr.analysis - --> - - <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are - currently supported on types that are sorted internally as strings - and on numeric types. - This includes "string", "boolean", "pint", "pfloat", "plong", "pdate", "pdouble". - - If sortMissingLast="true", then a sort on this field will cause documents - without the field to come after documents with the field, - regardless of the requested sort order (asc or desc). - - If sortMissingFirst="true", then a sort on this field will cause documents - without the field to come before documents with the field, - regardless of the requested sort order. - - If sortMissingLast="false" and sortMissingFirst="false" (the default), - then default lucene sorting will be used which places docs without the - field first in an ascending sort and last in a descending sort. - --> - - <!-- The StrField type is not analyzed, but indexed/stored verbatim. --> - <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" /> - <fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" /> - - <!-- boolean type: "true" or "false" --> - <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> - <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/> - - <!-- - Numeric field types that index values using KD-trees. - Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc. - --> - <fieldType name="pint" class="solr.IntPointField" docValues="true"/> - <fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/> - <fieldType name="plong" class="solr.LongPointField" docValues="true"/> - <fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/> - - <fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/> - <fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/> - <fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/> - <fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/> - <fieldType name="random" class="solr.RandomSortField" indexed="true"/> - - - <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and - is a more restricted form of the canonical representation of dateTime - http://www.w3.org/TR/xmlschema-2/#dateTime - The trailing "Z" designates UTC time and is mandatory. - Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z - All other components are mandatory. - - Expressions can also be used to denote calculations that should be - performed relative to "NOW" to determine the value, ie... - - NOW/HOUR - ... Round to the start of the current hour - NOW-1DAY - ... Exactly 1 day prior to now - NOW/DAY+6MONTHS+3DAYS - ... 6 months and 3 days in the future from the start of - the current day - - --> - <!-- KD-tree versions of date fields --> - <fieldType name="pdate" class="solr.DatePointField" docValues="true"/> - <fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/> - - <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings --> - <fieldType name="binary" class="solr.BinaryField"/> - - <!-- solr.TextField allows the specification of custom text analyzers - specified as a tokenizer and a list of token filters. Different - analyzers may be specified for indexing and querying. - - The optional positionIncrementGap puts space between multiple fields of - this type on the same document, with the purpose of preventing false phrase - matching across fields. - - For more info on customizing your analyzer chain, please see - http://lucene.apache.org/solr/guide/understanding-analyzers-tokenizers-and-filters.html#understanding-analyzers-tokenizers-and-filters - --> - - <!-- One can also specify an existing Analyzer class that has a - default constructor via the class attribute on the analyzer element. - Example: - <fieldType name="text_greek" class="solr.TextField"> - <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/> - </fieldType> - --> - - <!-- A text field that only splits on whitespace for exact matching of words --> - <dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/> - <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.WhitespaceTokenizerFactory"/> - </analyzer> - </fieldType> - - <!-- A general text field that has reasonable, generic - cross-language defaults: it tokenizes with StandardTokenizer, - removes stop words from case-insensitive "stopwords.txt" - (empty by default), and down cases. At query time only, it - also applies synonyms. - --> - <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true"> - <analyzer type="index"> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> - <!-- in this example, we will only use synonyms at query time - <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> - <filter class="solr.FlattenGraphFilterFactory"/> - --> - <filter class="solr.LowerCaseFilterFactory"/> - </analyzer> - <analyzer type="query"> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> - <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> - <filter class="solr.LowerCaseFilterFactory"/> - </analyzer> - </fieldType> - - - <!-- SortableTextField generaly functions exactly like TextField, - except that it supports, and by default uses, docValues for sorting (or faceting) - on the first 1024 characters of the original field values (which is configurable). - - This makes it a bit more useful then TextField in many situations, but the trade-off - is that it takes up more space on disk; which is why it's not used in place of TextField - for every fieldType in this _default schema. - --> - <dynamicField name="*_t_sort" type="text_gen_sort" indexed="true" stored="true" multiValued="false"/> - <dynamicField name="*_txt_sort" type="text_gen_sort" indexed="true" stored="true"/> - <fieldType name="text_gen_sort" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true"> - <analyzer type="index"> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> - <filter class="solr.LowerCaseFilterFactory"/> - </analyzer> - <analyzer type="query"> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> - <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> - <filter class="solr.LowerCaseFilterFactory"/> - </analyzer> - </fieldType> - - <!-- A text field with defaults appropriate for English: it tokenizes with StandardTokenizer, - removes English stop words (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and - finally applies Porter's stemming. The query time analyzer also applies synonyms from synonyms.txt. --> - <dynamicField name="*_txt_en" type="text_en" indexed="true" stored="true"/> - <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> - <analyzer type="index"> - <tokenizer class="solr.StandardTokenizerFactory"/> - <!-- in this example, we will only use synonyms at query time - <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> - <filter class="solr.FlattenGraphFilterFactory"/> - --> - <!-- Case insensitive stop word removal. - --> - <filter class="solr.StopFilterFactory" - ignoreCase="true" - words="lang/stopwords_en.txt" - /> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.EnglishPossessiveFilterFactory"/> - <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> - <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: - <filter class="solr.EnglishMinimalStemFilterFactory"/> - --> - <filter class="solr.PorterStemFilterFactory"/> - </analyzer> - <analyzer type="query"> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> - <filter class="solr.StopFilterFactory" - ignoreCase="true" - words="lang/stopwords_en.txt" - /> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.EnglishPossessiveFilterFactory"/> - <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> - <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: - <filter class="solr.EnglishMinimalStemFilterFactory"/> - --> - <filter class="solr.PorterStemFilterFactory"/> - </analyzer> - </fieldType> - - <!-- A text field with defaults appropriate for English, plus - aggressive word-splitting and autophrase features enabled. - This field is just like text_en, except it adds - WordDelimiterGraphFilter to enable splitting and matching of - words on case-change, alpha numeric boundaries, and - non-alphanumeric chars. This means certain compound word - cases will work, for example query "wi fi" will match - document "WiFi" or "wi-fi". - --> - <dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/> - <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> - <analyzer type="index"> - <tokenizer class="solr.WhitespaceTokenizerFactory"/> - <!-- in this example, we will only use synonyms at query time - <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> - --> - <!-- Case insensitive stop word removal. - --> - <filter class="solr.StopFilterFactory" - ignoreCase="true" - words="lang/stopwords_en.txt" - /> - <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> - <filter class="solr.PorterStemFilterFactory"/> - <filter class="solr.FlattenGraphFilterFactory" /> - </analyzer> - <analyzer type="query"> - <tokenizer class="solr.WhitespaceTokenizerFactory"/> - <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> - <filter class="solr.StopFilterFactory" - ignoreCase="true" - words="lang/stopwords_en.txt" - /> - <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> - <filter class="solr.PorterStemFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Less flexible matching, but less false matches. Probably not ideal for product names, - but may be good for SKUs. Can insert dashes in the wrong place and still match. --> - <dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/> - <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> - <analyzer type="index"> - <tokenizer class="solr.WhitespaceTokenizerFactory"/> - <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> - <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> - <filter class="solr.EnglishMinimalStemFilterFactory"/> - <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes - possible with WordDelimiterGraphFilter in conjuncton with stemming. --> - <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> - <filter class="solr.FlattenGraphFilterFactory" /> - </analyzer> - <analyzer type="query"> - <tokenizer class="solr.WhitespaceTokenizerFactory"/> - <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> - <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> - <filter class="solr.EnglishMinimalStemFilterFactory"/> - <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes - possible with WordDelimiterGraphFilter in conjuncton with stemming. --> - <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Just like text_general except it reverses the characters of - each token, to enable more efficient leading wildcard queries. - --> - <dynamicField name="*_txt_rev" type="text_general_rev" indexed="true" stored="true"/> - <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> - <analyzer type="index"> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" - maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> - </analyzer> - <analyzer type="query"> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> - <filter class="solr.LowerCaseFilterFactory"/> - </analyzer> - </fieldType> - - <dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/> - <fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" > - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> - </analyzer> - </fieldType> - - <!-- lowercases the entire field value, keeping it as a single token. --> - <dynamicField name="*_s_lower" type="lowercase" indexed="true" stored="true"/> - <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.KeywordTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory" /> - </analyzer> - </fieldType> - - <!-- - Example of using PathHierarchyTokenizerFactory at index time, so - queries for paths match documents at that path, or in descendent paths - --> - <dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/> - <fieldType name="descendent_path" class="solr.TextField"> - <analyzer type="index"> - <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> - </analyzer> - <analyzer type="query"> - <tokenizer class="solr.KeywordTokenizerFactory" /> - </analyzer> - </fieldType> - - <!-- - Example of using PathHierarchyTokenizerFactory at query time, so - queries for paths match documents at that path, or in ancestor paths - --> - <dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/> - <fieldType name="ancestor_path" class="solr.TextField"> - <analyzer type="index"> - <tokenizer class="solr.KeywordTokenizerFactory" /> - </analyzer> - <analyzer type="query"> - <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> - </analyzer> - </fieldType> - - <!-- This point type indexes the coordinates as separate fields (subFields) - If subFieldType is defined, it references a type, and a dynamic field - definition is created matching *___<typename>. Alternately, if - subFieldSuffix is defined, that is used to create the subFields. - Example: if subFieldType="double", then the coordinates would be - indexed in fields myloc_0___double,myloc_1___double. - Example: if subFieldSuffix="_d" then the coordinates would be indexed - in fields myloc_0_d,myloc_1_d - The subFields are an implementation detail of the fieldType, and end - users normally should not need to know about them. - --> - <dynamicField name="*_point" type="point" indexed="true" stored="true"/> - <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/> - - <!-- A specialized field for geospatial search filters and distance sorting. --> - <fieldType name="location" class="solr.LatLonPointSpatialField" docValues="true"/> - - <!-- A geospatial field type that supports multiValued and polygon shapes. - For more information about this and other spatial fields see: - http://lucene.apache.org/solr/guide/spatial-search.html - --> - <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" - geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" /> - - <!-- Payloaded field types --> - <fieldType name="delimited_payloads_float" stored="false" indexed="true" class="solr.TextField"> - <analyzer> - <tokenizer class="solr.WhitespaceTokenizerFactory"/> - <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/> - </analyzer> - </fieldType> - <fieldType name="delimited_payloads_int" stored="false" indexed="true" class="solr.TextField"> - <analyzer> - <tokenizer class="solr.WhitespaceTokenizerFactory"/> - <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="integer"/> - </analyzer> - </fieldType> - <fieldType name="delimited_payloads_string" stored="false" indexed="true" class="solr.TextField"> - <analyzer> - <tokenizer class="solr.WhitespaceTokenizerFactory"/> - <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="identity"/> - </analyzer> - </fieldType> - - <!-- some examples for different languages (generally ordered by ISO code) --> - - <!-- Arabic --> - <dynamicField name="*_txt_ar" type="text_ar" indexed="true" stored="true"/> - <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <!-- for any non-arabic --> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" /> - <!-- normalizes ﻯ to ﻱ, etc --> - <filter class="solr.ArabicNormalizationFilterFactory"/> - <filter class="solr.ArabicStemFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Bulgarian --> - <dynamicField name="*_txt_bg" type="text_bg" indexed="true" stored="true"/> - <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" /> - <filter class="solr.BulgarianStemFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Catalan --> - <dynamicField name="*_txt_ca" type="text_ca" indexed="true" stored="true"/> - <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <!-- removes l', etc --> - <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" /> - <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/> - </analyzer> - </fieldType> - - <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) --> - <dynamicField name="*_txt_cjk" type="text_cjk" indexed="true" stored="true"/> - <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <!-- normalize width before bigram, as e.g. half-width dakuten combine --> - <filter class="solr.CJKWidthFilterFactory"/> - <!-- for any non-CJK --> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.CJKBigramFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Czech --> - <dynamicField name="*_txt_cz" type="text_cz" indexed="true" stored="true"/> - <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" /> - <filter class="solr.CzechStemFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Danish --> - <dynamicField name="*_txt_da" type="text_da" indexed="true" stored="true"/> - <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" /> - <filter class="solr.SnowballPorterFilterFactory" language="Danish"/> - </analyzer> - </fieldType> - - <!-- German --> - <dynamicField name="*_txt_de" type="text_de" indexed="true" stored="true"/> - <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> - <filter class="solr.GermanNormalizationFilterFactory"/> - <filter class="solr.GermanLightStemFilterFactory"/> - <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> - <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> - </analyzer> - </fieldType> - - <!-- Greek --> - <dynamicField name="*_txt_el" type="text_el" indexed="true" stored="true"/> - <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <!-- greek specific lowercase for sigma --> - <filter class="solr.GreekLowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" /> - <filter class="solr.GreekStemFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Spanish --> - <dynamicField name="*_txt_es" type="text_es" indexed="true" stored="true"/> - <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> - <filter class="solr.SpanishLightStemFilterFactory"/> - <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> - </analyzer> - </fieldType> - - <!-- Basque --> - <dynamicField name="*_txt_eu" type="text_eu" indexed="true" stored="true"/> - <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" /> - <filter class="solr.SnowballPorterFilterFactory" language="Basque"/> - </analyzer> - </fieldType> - - <!-- Persian --> - <dynamicField name="*_txt_fa" type="text_fa" indexed="true" stored="true"/> - <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <!-- for ZWNJ --> - <charFilter class="solr.PersianCharFilterFactory"/> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.ArabicNormalizationFilterFactory"/> - <filter class="solr.PersianNormalizationFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" /> - </analyzer> - </fieldType> - - <!-- Finnish --> - <dynamicField name="*_txt_fi" type="text_fi" indexed="true" stored="true"/> - <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" /> - <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/> - <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> --> - </analyzer> - </fieldType> - - <!-- French --> - <dynamicField name="*_txt_fr" type="text_fr" indexed="true" stored="true"/> - <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <!-- removes l', etc --> - <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" /> - <filter class="solr.FrenchLightStemFilterFactory"/> - <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> --> - <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> --> - </analyzer> - </fieldType> - - <!-- Irish --> - <dynamicField name="*_txt_ga" type="text_ga" indexed="true" stored="true"/> - <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <!-- removes d', etc --> - <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/> - <!-- removes n-, etc. position increments is intentionally false! --> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/> - <filter class="solr.IrishLowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/> - <filter class="solr.SnowballPorterFilterFactory" language="Irish"/> - </analyzer> - </fieldType> - - <!-- Galician --> - <dynamicField name="*_txt_gl" type="text_gl" indexed="true" stored="true"/> - <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" /> - <filter class="solr.GalicianStemFilterFactory"/> - <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> --> - </analyzer> - </fieldType> - - <!-- Hindi --> - <dynamicField name="*_txt_hi" type="text_hi" indexed="true" stored="true"/> - <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <!-- normalizes unicode representation --> - <filter class="solr.IndicNormalizationFilterFactory"/> - <!-- normalizes variation in spelling --> - <filter class="solr.HindiNormalizationFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" /> - <filter class="solr.HindiStemFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Hungarian --> - <dynamicField name="*_txt_hu" type="text_hu" indexed="true" stored="true"/> - <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" /> - <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/> - <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> --> - </analyzer> - </fieldType> - - <!-- Armenian --> - <dynamicField name="*_txt_hy" type="text_hy" indexed="true" stored="true"/> - <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" /> - <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/> - </analyzer> - </fieldType> - - <!-- Indonesian --> - <dynamicField name="*_txt_id" type="text_id" indexed="true" stored="true"/> - <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" /> - <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false --> - <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> - </analyzer> - </fieldType> - - <!-- Italian --> - <dynamicField name="*_txt_it" type="text_it" indexed="true" stored="true"/> - <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <!-- removes l', etc --> - <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" /> - <filter class="solr.ItalianLightStemFilterFactory"/> - <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> --> - </analyzer> - </fieldType> - - <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming) - - NOTE: If you want to optimize search for precision, use default operator AND in your request - handler config (q.op) Use OR if you would like to optimize for recall (default). - --> - <dynamicField name="*_txt_ja" type="text_ja" indexed="true" stored="true"/> - <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false"> - <analyzer> - <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer) - - Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic - is used to segment compounds into its parts and the compound itself is kept as synonym. - - Valid values for attribute mode are: - normal: regular segmentation - search: segmentation useful for search with synonyms compounds (default) - extended: same as search mode, but unigrams unknown words (experimental) - - For some applications it might be good to use search mode for indexing and normal mode for - queries to reduce recall and prevent parts of compounds from being matched and highlighted. - Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query. - - Kuromoji also has a convenient user dictionary feature that allows overriding the statistical - model with your own entries for segmentation, part-of-speech tags and readings without a need - to specify weights. Notice that user dictionaries have not been subject to extensive testing. - - User dictionary attributes are: - userDictionary: user dictionary filename - userDictionaryEncoding: user dictionary encoding (default is UTF-8) - - See lang/userdict_ja.txt for a sample user dictionary file. - - Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them. - --> - <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/> - <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>--> - <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) --> - <filter class="solr.JapaneseBaseFormFilterFactory"/> - <!-- Removes tokens with certain part-of-speech tags --> - <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" /> - <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) --> - <filter class="solr.CJKWidthFilterFactory"/> - <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking --> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" /> - <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) --> - <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/> - <!-- Lower-cases romaji characters --> - <filter class="solr.LowerCaseFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Korean morphological analysis --> - <dynamicField name="*_txt_ko" type="text_ko" indexed="true" stored="true"/> - <fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <!-- Nori Korean morphological analyzer/tokenizer (KoreanTokenizer) - The Korean (nori) analyzer integrates Lucene nori analysis module into Solr. - It uses the mecab-ko-dic dictionary to perform morphological analysis of Korean texts. - - This dictionary was built with MeCab, it defines a format for the features adapted - for the Korean language. - - Nori also has a convenient user dictionary feature that allows overriding the statistical - model with your own entries for segmentation, part-of-speech tags and readings without a need - to specify weights. Notice that user dictionaries have not been subject to extensive testing. - - The tokenizer supports multiple schema attributes: - * userDictionary: User dictionary path. - * userDictionaryEncoding: User dictionary encoding. - * decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'. - * outputUnknownUnigrams: If true outputs unigrams for unknown words. - --> - <tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/> - <!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags', - listing the tags to remove. By default it removes: - E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV - This is basically an equivalent to stemming. - --> - <filter class="solr.KoreanPartOfSpeechStopFilterFactory" /> - <!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: --> - <filter class="solr.KoreanReadingFormFilterFactory" /> - <filter class="solr.LowerCaseFilterFactory" /> - </analyzer> - </fieldType> - - <!-- Latvian --> - <dynamicField name="*_txt_lv" type="text_lv" indexed="true" stored="true"/> - <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" /> - <filter class="solr.LatvianStemFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Dutch --> - <dynamicField name="*_txt_nl" type="text_nl" indexed="true" stored="true"/> - <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" /> - <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> - <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/> - </analyzer> - </fieldType> - - <!-- Norwegian --> - <dynamicField name="*_txt_no" type="text_no" indexed="true" stored="true"/> - <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" /> - <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/> - <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> --> - <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> --> - </analyzer> - </fieldType> - - <!-- Portuguese --> - <dynamicField name="*_txt_pt" type="text_pt" indexed="true" stored="true"/> - <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" /> - <filter class="solr.PortugueseLightStemFilterFactory"/> - <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> - <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> - <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> - </analyzer> - </fieldType> - - <!-- Romanian --> - <dynamicField name="*_txt_ro" type="text_ro" indexed="true" stored="true"/> - <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" /> - <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/> - </analyzer> - </fieldType> - - <!-- Russian --> - <dynamicField name="*_txt_ru" type="text_ru" indexed="true" stored="true"/> - <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" /> - <filter class="solr.SnowballPorterFilterFactory" language="Russian"/> - <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> --> - </analyzer> - </fieldType> - - <!-- Swedish --> - <dynamicField name="*_txt_sv" type="text_sv" indexed="true" stored="true"/> - <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" /> - <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> - <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> --> - </analyzer> - </fieldType> - - <!-- Thai --> - <dynamicField name="*_txt_th" type="text_th" indexed="true" stored="true"/> - <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.ThaiTokenizerFactory"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" /> - </analyzer> - </fieldType> - - <!-- Turkish --> - <dynamicField name="*_txt_tr" type="text_tr" indexed="true" stored="true"/> - <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> - <analyzer> - <tokenizer class="solr.StandardTokenizerFactory"/> - <filter class="solr.TurkishLowerCaseFilterFactory"/> - <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" /> - <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/> - </analyzer> - </fieldType> - - <!-- Similarity is the scoring routine for each document vs. a query. - A custom Similarity or SimilarityFactory may be specified here, but - the default is fine for most applications. - For more info: http://lucene.apache.org/solr/guide/other-schema-elements.html#OtherSchemaElements-Similarity - --> - <!-- - <similarity class="com.example.solr.CustomSimilarityFactory"> - <str name="paramkey">param value</str> - </similarity> - --> - - <field name="org-defined-funding-type" type="string" indexed="true" stored="true" - multiValued="false" /> - - <uniqueKey>org-defined-funding-type</uniqueKey> - - <field name="text" type="text_general" indexed="true" stored="false" - multiValued="true" /> - <copyField source="*" dest="text" /> - -</schema> diff --git a/solr-config/cores/fundingSubType/conf/solrconfig.xml b/solr-config/cores/fundingSubType/conf/solrconfig.xml deleted file mode 100644 index 4b1fbb68f1d..00000000000 --- a/solr-config/cores/fundingSubType/conf/solrconfig.xml +++ /dev/null @@ -1,1174 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<!-- - For more details about configurations options that may appear in - this file, see http://wiki.apache.org/solr/SolrConfigXml. ---> -<config> - <!-- In all configuration below, a prefix of "solr." for class names - is an alias that causes solr to search appropriate packages, - including org.apache.solr.(search|update|request|core|analysis) - - You may also specify a fully qualified Java classname if you - have your own custom plugins. - --> - - <!-- Controls what version of Lucene various components of Solr - adhere to. Generally, you want to use the latest version to - get all bug fixes and improvements. It is highly recommended - that you fully re-index after changing this setting as it can - affect both how text is indexed and queried. - --> - <luceneMatchVersion>7.7.1</luceneMatchVersion> - - <!-- <lib/> directives can be used to instruct Solr to load any Jars - identified and use them to resolve any "plugins" specified in - your solrconfig.xml or schema.xml (ie: Analyzers, Request - Handlers, etc...). - - All directories and paths are resolved relative to the - instanceDir. - - Please note that <lib/> directives are processed in the order - that they appear in your solrconfig.xml file, and are "stacked" - on top of each other when building a ClassLoader - so if you have - plugin jars with dependencies on other jars, the "lower level" - dependency jars should be loaded first. - - If a "./lib" directory exists in your instanceDir, all files - found in it are included as if you had used the following - syntax... - - <lib dir="./lib" /> - --> - - <!-- A 'dir' option by itself adds any files found in the directory - to the classpath, this is useful for including all jars in a - directory. - - When a 'regex' is specified in addition to a 'dir', only the - files in that directory which completely match the regex - (anchored on both ends) will be included. - - If a 'dir' option (with or without a regex) is used and nothing - is found that matches, a warning will be logged. - - The examples below can be used to load some solr-contribs along - with their external dependencies. - --> - <lib dir="${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar" /> - <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-cell-\d.*\.jar" /> - - <lib dir="${solr.install.dir:../../../..}/contrib/clustering/lib/" regex=".*\.jar" /> - <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-clustering-\d.*\.jar" /> - - <lib dir="${solr.install.dir:../../../..}/contrib/langid/lib/" regex=".*\.jar" /> - <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-langid-\d.*\.jar" /> - - <lib dir="${solr.install.dir:../../../..}/contrib/velocity/lib" regex=".*\.jar" /> - <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-velocity-\d.*\.jar" /> - <!-- an exact 'path' can be used instead of a 'dir' to specify a - specific jar file. This will cause a serious error to be logged - if it can't be loaded. - --> - <!-- - <lib path="../a-jar-that-does-not-exist.jar" /> - --> - - <!-- Data Directory - - Used to specify an alternate directory to hold all index data - other than the default ./data under the Solr home. If - replication is in use, this should match the replication - configuration. - --> - <dataDir>${solr.data.dir:}</dataDir> - - - <!-- The DirectoryFactory to use for indexes. - - solr.StandardDirectoryFactory is filesystem - based and tries to pick the best implementation for the current - JVM and platform. solr.NRTCachingDirectoryFactory, the default, - wraps solr.StandardDirectoryFactory and caches small files in memory - for better NRT performance. - - One can force a particular implementation via solr.MMapDirectoryFactory, - solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory. - - solr.RAMDirectoryFactory is memory based and not persistent. - --> - <directoryFactory name="DirectoryFactory" - class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> - - <!-- The CodecFactory for defining the format of the inverted index. - The default implementation is SchemaCodecFactory, which is the official Lucene - index format, but hooks into the schema to provide per-field customization of - the postings lists and per-document values in the fieldType element - (postingsFormat/docValuesFormat). Note that most of the alternative implementations - are experimental, so if you choose to customize the index format, it's a good - idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader) - before upgrading to a newer version to avoid unnecessary reindexing. - A "compressionMode" string element can be added to <codecFactory> to choose - between the existing compression modes in the default codec: "BEST_SPEED" (default) - or "BEST_COMPRESSION". - --> - <codecFactory class="solr.SchemaCodecFactory"/> - - <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Index Config - These settings control low-level behavior of indexing - Most example settings here show the default value, but are commented - out, to more easily see where customizations have been made. - - Note: This replaces <indexDefaults> and <mainIndex> from older versions - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> - <indexConfig> - <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a - LimitTokenCountFilterFactory in your fieldType definition. E.g. - <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/> - --> - <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 --> - <writeLockTimeout>1000</writeLockTimeout> - - <!-- Expert: Enabling compound file will use less files for the index, - using fewer file descriptors on the expense of performance decrease. - Default in Lucene is "true". Default in Solr is "false" (since 3.6) --> - <useCompoundFile>false</useCompoundFile> - - <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene - indexing for buffering added documents and deletions before they are - flushed to the Directory. - maxBufferedDocs sets a limit on the number of documents buffered - before flushing. - If both ramBufferSizeMB and maxBufferedDocs is set, then - Lucene will flush based on whichever limit is hit first. --> - <ramBufferSizeMB>100</ramBufferSizeMB> - <!-- <maxBufferedDocs>1000</maxBufferedDocs> --> - - <!-- Expert: Merge Policy - The Merge Policy in Lucene controls how merging of segments is done. - The default since Solr/Lucene 3.3 is TieredMergePolicy. - The default since Lucene 2.3 was the LogByteSizeMergePolicy, - Even older versions of Lucene used LogDocMergePolicy. - --> - <!-- - <mergePolicyFactory class="org.apache.solr.index.TieredMergePolicyFactory"> - <int name="maxMergeAtOnce">10</int> - <int name="segmentsPerTier">10</int> - <double name="noCFSRatio">0.1</double> - </mergePolicyFactory> - --> - - <!-- Expert: Merge Scheduler - The Merge Scheduler in Lucene controls how merges are - performed. The ConcurrentMergeScheduler (Lucene 2.3 default) - can perform merges in the background using separate threads. - The SerialMergeScheduler (Lucene 2.2 default) does not. - --> - <!-- - <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/> - --> - - <!-- LockFactory - - This option specifies which Lucene LockFactory implementation - to use. - - single = SingleInstanceLockFactory - suggested for a - read-only index or when there is no possibility of - another process trying to modify the index. - native = NativeFSLockFactory - uses OS native file locking. - Do not use when multiple solr webapps in the same - JVM are attempting to share a single index. - simple = SimpleFSLockFactory - uses a plain file for locking - - Defaults: 'native' is default for Solr3.6 and later, otherwise - 'simple' is the default - - More details on the nuances of each LockFactory... - http://wiki.apache.org/lucene-java/AvailableLockFactories - --> - <lockType>${solr.lock.type:native}</lockType> - - <!-- Commit Deletion Policy - Custom deletion policies can be specified here. The class must - implement org.apache.lucene.index.IndexDeletionPolicy. - - The default Solr IndexDeletionPolicy implementation supports - deleting index commit points on number of commits, age of - commit point and optimized status. - - The latest commit point should always be preserved regardless - of the criteria. - --> - <!-- - <deletionPolicy class="solr.SolrDeletionPolicy"> - --> - <!-- The number of commit points to be kept --> - <!-- <str name="maxCommitsToKeep">1</str> --> - <!-- The number of optimized commit points to be kept --> - <!-- <str name="maxOptimizedCommitsToKeep">0</str> --> - <!-- - Delete all commit points once they have reached the given age. - Supports DateMathParser syntax e.g. - --> - <!-- - <str name="maxCommitAge">30MINUTES</str> - <str name="maxCommitAge">1DAY</str> - --> - <!-- - </deletionPolicy> - --> - - <!-- Lucene Infostream - - To aid in advanced debugging, Lucene provides an "InfoStream" - of detailed information when indexing. - - Setting The value to true will instruct the underlying Lucene - IndexWriter to write its debugging info the specified file - --> - <!-- <infoStream file="INFOSTREAM.txt">false</infoStream> --> - </indexConfig> - - - <!-- JMX - - This example enables JMX if and only if an existing MBeanServer - is found, use this if you want to configure JMX through JVM - parameters. Remove this to disable exposing Solr configuration - and statistics to JMX. - - For more details see http://wiki.apache.org/solr/SolrJmx - --> - <jmx /> - <!-- If you want to connect to a particular server, specify the - agentId - --> - <!-- <jmx agentId="myAgent" /> --> - <!-- If you want to start a new MBeanServer, specify the serviceUrl --> - <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/> - --> - - <!-- The default high-performance update handler --> - <updateHandler class="solr.DirectUpdateHandler2"> - - <!-- Enables a transaction log, used for real-time get, durability, and - and solr cloud replica recovery. The log can grow as big as - uncommitted changes to the index, so use of a hard autoCommit - is recommended (see below). - "dir" - the target directory for transaction logs, defaults to the - solr data directory. - "numVersionBuckets" - sets the number of buckets used to keep - track of max version values when checking for re-ordered - updates; increase this value to reduce the cost of - synchronizing access to version buckets during high-volume - indexing, this requires 8 bytes (long) * numVersionBuckets - of heap space per Solr core. - --> - <updateLog> - <str name="dir">${solr.ulog.dir:}</str> - <int name="numVersionBuckets">${solr.ulog.numVersionBuckets:65536}</int> - </updateLog> - - <!-- AutoCommit - - Perform a hard commit automatically under certain conditions. - Instead of enabling autoCommit, consider using "commitWithin" - when adding documents. - - http://wiki.apache.org/solr/UpdateXmlMessages - - maxDocs - Maximum number of documents to add since the last - commit before automatically triggering a new commit. - - maxTime - Maximum amount of time in ms that is allowed to pass - since a document was added before automatically - triggering a new commit. - openSearcher - if false, the commit causes recent index changes - to be flushed to stable storage, but does not cause a new - searcher to be opened to make those changes visible. - - If the updateLog is enabled, then it's highly recommended to - have some sort of hard autoCommit to limit the log size. - --> - <autoCommit> - <maxTime>${solr.autoCommit.funding.maxTime:300000}</maxTime> - <openSearcher>true</openSearcher> - </autoCommit> - - <!-- softAutoCommit is like autoCommit except it causes a - 'soft' commit which only ensures that changes are visible - but does not ensure that data is synced to disk. This is - faster and more near-realtime friendly than a hard commit. - <autoSoftCommit> - <maxTime>${solr.autoSoftCommit.org.maxTime:150000}</maxTime> - </autoSoftCommit> - --> - - <!-- Update Related Event Listeners - - Various IndexWriter related events can trigger Listeners to - take actions. - - postCommit - fired after every commit or optimize command - postOptimize - fired after every optimize command - --> - - </updateHandler> - - <!-- IndexReaderFactory - - Use the following format to specify a custom IndexReaderFactory, - which allows for alternate IndexReader implementations. - - ** Experimental Feature ** - - Please note - Using a custom IndexReaderFactory may prevent - certain other features from working. The API to - IndexReaderFactory may change without warning or may even be - removed from future releases if the problems cannot be - resolved. - - - ** Features that may not work with custom IndexReaderFactory ** - - The ReplicationHandler assumes a disk-resident index. Using a - custom IndexReader implementation may cause incompatibility - with ReplicationHandler and may cause replication to not work - correctly. See SOLR-1366 for details. - - --> - <!-- - <indexReaderFactory name="IndexReaderFactory" class="package.class"> - <str name="someArg">Some Value</str> - </indexReaderFactory > - --> - - <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Query section - these settings control query time things like caches - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> - <query> - - <!-- Maximum number of clauses in each BooleanQuery, an exception - is thrown if exceeded. It is safe to increase or remove this setting, - since it is purely an arbitrary limit to try and catch user errors where - large boolean queries may not be the best implementation choice. - --> - <maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses> - - <!-- Solr Internal Query Caches - - There are two implementations of cache available for Solr, - LRUCache, based on a synchronized LinkedHashMap, and - FastLRUCache, based on a ConcurrentHashMap. - - FastLRUCache has faster gets and slower puts in single - threaded operation and thus is generally faster than LRUCache - when the hit ratio of the cache is high (> 75%), and may be - faster under other scenarios on multi-cpu systems. - --> - - <!-- Filter Cache - - Cache used by SolrIndexSearcher for filters (DocSets), - unordered sets of *all* documents that match a query. When a - new searcher is opened, its caches may be prepopulated or - "autowarmed" using data from caches in the old searcher. - autowarmCount is the number of items to prepopulate. For - LRUCache, the autowarmed items will be the most recently - accessed items. - - Parameters: - class - the SolrCache implementation LRUCache or - (LRUCache or FastLRUCache) - size - the maximum number of entries in the cache - initialSize - the initial capacity (number of entries) of - the cache. (see java.util.HashMap) - autowarmCount - the number of entries to prepopulate from - and old cache. - maxRamMB - the maximum amount of RAM (in MB) that this cache is allowed - to occupy. Note that when this option is specified, the size - and initialSize parameters are ignored. - --> - <filterCache class="solr.FastLRUCache" - size="${solr.orcid.filterCache.size:1024}" - initialSize="${solr.orcid.filterCache.initialSize:512}" - autowarmCount="${solr.orcid.filterCache.autowarmCount:0}"/> - - <!-- Query Result Cache - - Caches results of searches - ordered lists of document ids - (DocList) based on a query, a sort, and the range of documents requested. - Additional supported parameter by LRUCache: - maxRamMB - the maximum amount of RAM (in MB) that this cache is allowed - to occupy - --> - <queryResultCache class="solr.LRUCache" - size="${solr.orcid.queryResultCache.size:1024}" - initialSize="${solr.orcid.queryResultCache.initialSize:512}" - autowarmCount="${solr.orcid.queryResultCache.autowarmCount:0}"/> - - <!-- Document Cache - - Caches Lucene Document objects (the stored fields for each - document). Since Lucene internal document ids are transient, - this cache will not be autowarmed. - --> - <documentCache class="solr.LRUCache" - size="${solr.orcid.documentCache.size:1024}" - initialSize="${solr.orcid.documentCache.initialSize:512}" - autowarmCount="${solr.orcid.documentCache.autowarmCount:0}"/> - - <!-- custom cache currently used by block join --> - <cache name="perSegFilter" - class="solr.search.LRUCache" - size="10" - initialSize="0" - autowarmCount="10" - regenerator="solr.NoOpRegenerator" /> - - <!-- Field Value Cache - - Cache used to hold field values that are quickly accessible - by document id. The fieldValueCache is created by default - even if not configured here. - --> - <!-- - <fieldValueCache class="solr.FastLRUCache" - size="512" - autowarmCount="128" - showItems="32" /> - --> - - <!-- Custom Cache - - Example of a generic cache. These caches may be accessed by - name through SolrIndexSearcher.getCache(),cacheLookup(), and - cacheInsert(). The purpose is to enable easy caching of - user/application level data. The regenerator argument should - be specified as an implementation of solr.CacheRegenerator - if autowarming is desired. - --> - <!-- - <cache name="myUserCache" - class="solr.LRUCache" - size="4096" - initialSize="1024" - autowarmCount="1024" - regenerator="com.mycompany.MyRegenerator" - /> - --> - - - <!-- Lazy Field Loading - - If true, stored fields that are not requested will be loaded - lazily. This can result in a significant speed improvement - if the usual case is to not load all stored fields, - especially if the skipped fields are large compressed text - fields. - --> - <enableLazyFieldLoading>true</enableLazyFieldLoading> - - <!-- Use Filter For Sorted Query - - A possible optimization that attempts to use a filter to - satisfy a search. If the requested sort does not include - score, then the filterCache will be checked for a filter - matching the query. If found, the filter will be used as the - source of document ids, and then the sort will be applied to - that. - - For most situations, this will not be useful unless you - frequently get the same search repeatedly with different sort - options, and none of them ever use "score" - --> - <!-- - <useFilterForSortedQuery>true</useFilterForSortedQuery> - --> - - <!-- Result Window Size - - An optimization for use with the queryResultCache. When a search - is requested, a superset of the requested number of document ids - are collected. For example, if a search for a particular query - requests matching documents 10 through 19, and queryWindowSize is 50, - then documents 0 through 49 will be collected and cached. Any further - requests in that range can be satisfied via the cache. - --> - <queryResultWindowSize>${solr.orcid.queryResultWindowSize:100}</queryResultWindowSize> - - <!-- Maximum number of documents to cache for any entry in the - queryResultCache. - --> - <queryResultMaxDocsCached>${solr.orcid.queryResultMaxDocsCached:500}</queryResultMaxDocsCached> - - <!-- Query Related Event Listeners - - Various IndexSearcher related events can trigger Listeners to - take actions. - - newSearcher - fired whenever a new searcher is being prepared - and there is a current searcher handling requests (aka - registered). It can be used to prime certain caches to - prevent long request times for certain requests. - - firstSearcher - fired whenever a new searcher is being - prepared but there is no current registered searcher to handle - requests or to gain autowarming data from. - - - --> - <!-- QuerySenderListener takes an array of NamedList and executes a - local query request for each NamedList in sequence. - --> - <listener event="newSearcher" class="solr.QuerySenderListener"> - <arr name="queries"> - <!-- - <lst><str name="q">solr</str><str name="sort">price asc</str></lst> - <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst> - --> - </arr> - </listener> - <listener event="firstSearcher" class="solr.QuerySenderListener"> - <arr name="queries"> - <!-- - <lst> - <str name="q">static firstSearcher warming in solrconfig.xml</str> - </lst> - --> - </arr> - </listener> - - <!-- Use Cold Searcher - - If a search request comes in and there is no current - registered searcher, then immediately register the still - warming searcher and use it. If "false" then all requests - will block until the first searcher is done warming. - --> - <useColdSearcher>false</useColdSearcher> - - <maxWarmingSearchers>${solr.orcid.maxWarmingSearchers:6}</maxWarmingSearchers> - - </query> - - - <!-- Request Dispatcher - - This section contains instructions for how the SolrDispatchFilter - should behave when processing requests for this SolrCore. - - --> - <requestDispatcher> - <!-- Request Parsing - - These settings indicate how Solr Requests may be parsed, and - what restrictions may be placed on the ContentStreams from - those requests - - enableRemoteStreaming - enables use of the stream.file - and stream.url parameters for specifying remote streams. - - multipartUploadLimitInKB - specifies the max size (in KiB) of - Multipart File Uploads that Solr will allow in a Request. - - formdataUploadLimitInKB - specifies the max size (in KiB) of - form data (application/x-www-form-urlencoded) sent via - POST. You can use POST to pass request parameters not - fitting into the URL. - - addHttpRequestToContext - if set to true, it will instruct - the requestParsers to include the original HttpServletRequest - object in the context map of the SolrQueryRequest under the - key "httpRequest". It will not be used by any of the existing - Solr components, but may be useful when developing custom - plugins. - - *** WARNING *** - Before enabling remote streaming, you should make sure your - system has authentication enabled. - --> - <requestParsers enableRemoteStreaming="true" - multipartUploadLimitInKB="2048000" /> - - <!-- HTTP Caching - - Set HTTP caching related parameters (for proxy caches and clients). - - The options below instruct Solr not to output any HTTP Caching - related headers - --> - <httpCaching never304="true" /> - <!-- If you include a <cacheControl> directive, it will be used to - generate a Cache-Control header (as well as an Expires header - if the value contains "max-age=") - - By default, no Cache-Control header is generated. - - You can use the <cacheControl> option even if you have set - never304="true" - --> - <!-- - <httpCaching never304="true" > - <cacheControl>max-age=30, public</cacheControl> - </httpCaching> - --> - <!-- To enable Solr to respond with automatically generated HTTP - Caching headers, and to response to Cache Validation requests - correctly, set the value of never304="false" - - This will cause Solr to generate Last-Modified and ETag - headers based on the properties of the Index. - - The following options can also be specified to affect the - values of these headers... - - lastModFrom - the default value is "openTime" which means the - Last-Modified value (and validation against If-Modified-Since - requests) will all be relative to when the current Searcher - was opened. You can change it to lastModFrom="dirLastMod" if - you want the value to exactly correspond to when the physical - index was last modified. - - etagSeed="..." is an option you can change to force the ETag - header (and validation against If-None-Match requests) to be - different even if the index has not changed (ie: when making - significant changes to your config file) - - (lastModifiedFrom and etagSeed are both ignored if you use - the never304="true" option) - --> - <!-- - <httpCaching lastModifiedFrom="openTime" - etagSeed="Solr"> - <cacheControl>max-age=30, public</cacheControl> - </httpCaching> - --> - </requestDispatcher> - - <!-- Request Handlers - - http://wiki.apache.org/solr/SolrRequestHandler - - Incoming queries will be dispatched to a specific handler by name - based on the path specified in the request. - - If a Request Handler is declared with startup="lazy", then it will - not be initialized until the first request that uses it. - - --> - <!-- SearchHandler - - http://wiki.apache.org/solr/SearchHandler - - For processing Search Queries, the primary Request Handler - provided with Solr is "SearchHandler" It delegates to a sequent - of SearchComponents (see below) and supports distributed - queries across multiple shards - --> - <requestHandler name="/select" class="solr.SearchHandler"> - <!-- default values for query parameters can be specified, these - will be overridden by parameters in the request - --> - <lst name="defaults"> - <str name="echoParams">explicit</str> - <int name="rows">10</int> - <str name="wt">xml</str> - <!-- Default search field --> - <str name="df">text</str> - </lst> - <!-- In addition to defaults, "appends" params can be specified - to identify values which should be appended to the list of - multi-val params from the query (or the existing "defaults"). - --> - <!-- In this example, the param "fq=instock:true" would be appended to - any query time fq params the user may specify, as a mechanism for - partitioning the index, independent of any user selected filtering - that may also be desired (perhaps as a result of faceted searching). - - NOTE: there is *absolutely* nothing a client can do to prevent these - "appends" values from being used, so don't use this mechanism - unless you are sure you always want it. - --> - <!-- - <lst name="appends"> - <str name="fq">inStock:true</str> - </lst> - --> - <!-- "invariants" are a way of letting the Solr maintainer lock down - the options available to Solr clients. Any params values - specified here are used regardless of what values may be specified - in either the query, the "defaults", or the "appends" params. - - In this example, the facet.field and facet.query params would - be fixed, limiting the facets clients can use. Faceting is - not turned on by default - but if the client does specify - facet=true in the request, these are the only facets they - will be able to see counts for; regardless of what other - facet.field or facet.query params they may specify. - - NOTE: there is *absolutely* nothing a client can do to prevent these - "invariants" values from being used, so don't use this mechanism - unless you are sure you always want it. - --> - <!-- - <lst name="invariants"> - <str name="facet.field">cat</str> - <str name="facet.field">manu_exact</str> - <str name="facet.query">price:[* TO 500]</str> - <str name="facet.query">price:[500 TO *]</str> - </lst> - --> - <!-- If the default list of SearchComponents is not desired, that - list can either be overridden completely, or components can be - prepended or appended to the default list. (see below) - --> - <!-- - <arr name="components"> - <str>nameOfCustomComponent1</str> - <str>nameOfCustomComponent2</str> - </arr> - --> - </requestHandler> - - <!-- A request handler that returns indented JSON by default --> - <requestHandler name="/query" class="solr.SearchHandler"> - <lst name="defaults"> - <str name="echoParams">explicit</str> - <str name="wt">json</str> - <str name="indent">true</str> - </lst> - </requestHandler> - - - <!-- A Robust Example - - This example SearchHandler declaration shows off usage of the - SearchHandler with many defaults declared - - Note that multiple instances of the same Request Handler - (SearchHandler) can be registered multiple times with different - names (and different init parameters) - --> - <requestHandler name="/browse" class="solr.SearchHandler" useParams="query,facets,velocity,browse"> - <lst name="defaults"> - <str name="echoParams">explicit</str> - </lst> - </requestHandler> - - <initParams path="/update/**,/query,/select,/tvrh,/elevate,/spell,/browse"> - <lst name="defaults"> - <str name="df">_text_</str> - </lst> - </initParams> - - <!-- Solr Cell Update Request Handler - - http://wiki.apache.org/solr/ExtractingRequestHandler - - --> - <requestHandler name="/update/extract" - startup="lazy" - class="solr.extraction.ExtractingRequestHandler" > - <lst name="defaults"> - <str name="lowernames">true</str> - <str name="fmap.meta">ignored_</str> - <str name="fmap.content">_text_</str> - </lst> - </requestHandler> - - <!-- Search Components - - Search components are registered to SolrCore and used by - instances of SearchHandler (which can access them by name) - - By default, the following components are available: - - <searchComponent name="query" class="solr.QueryComponent" /> - <searchComponent name="facet" class="solr.FacetComponent" /> - <searchComponent name="mlt" class="solr.MoreLikeThisComponent" /> - <searchComponent name="highlight" class="solr.HighlightComponent" /> - <searchComponent name="stats" class="solr.StatsComponent" /> - <searchComponent name="debug" class="solr.DebugComponent" /> - - Default configuration in a requestHandler would look like: - - <arr name="components"> - <str>query</str> - <str>facet</str> - <str>mlt</str> - <str>highlight</str> - <str>stats</str> - <str>debug</str> - </arr> - - If you register a searchComponent to one of the standard names, - that will be used instead of the default. - - To insert components before or after the 'standard' components, use: - - <arr name="first-components"> - <str>myFirstComponentName</str> - </arr> - - <arr name="last-components"> - <str>myLastComponentName</str> - </arr> - - NOTE: The component registered with the name "debug" will - always be executed after the "last-components" - - --> - - <!-- Spell Check - - The spell check component can return a list of alternative spelling - suggestions. - - http://wiki.apache.org/solr/SpellCheckComponent - --> - <searchComponent name="spellcheck" class="solr.SpellCheckComponent"> - - <str name="queryAnalyzerFieldType">text_general</str> - - <!-- Multiple "Spell Checkers" can be declared and used by this - component - --> - - <!-- a spellchecker built from a field of the main index --> - <lst name="spellchecker"> - <str name="name">default</str> - <str name="field">_text_</str> - <str name="classname">solr.DirectSolrSpellChecker</str> - <!-- the spellcheck distance measure used, the default is the internal levenshtein --> - <str name="distanceMeasure">internal</str> - <!-- minimum accuracy needed to be considered a valid spellcheck suggestion --> - <float name="accuracy">0.5</float> - <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 --> - <int name="maxEdits">2</int> - <!-- the minimum shared prefix when enumerating terms --> - <int name="minPrefix">1</int> - <!-- maximum number of inspections per result. --> - <int name="maxInspections">5</int> - <!-- minimum length of a query term to be considered for correction --> - <int name="minQueryLength">4</int> - <!-- maximum threshold of documents a query term can appear to be considered for correction --> - <float name="maxQueryFrequency">0.01</float> - <!-- uncomment this to require suggestions to occur in 1% of the documents - <float name="thresholdTokenFrequency">.01</float> - --> - </lst> - - <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage --> - <!-- - <lst name="spellchecker"> - <str name="name">wordbreak</str> - <str name="classname">solr.WordBreakSolrSpellChecker</str> - <str name="field">name</str> - <str name="combineWords">true</str> - <str name="breakWords">true</str> - <int name="maxChanges">10</int> - </lst> - --> - </searchComponent> - - <!-- A request handler for demonstrating the spellcheck component. - - NOTE: This is purely as an example. The whole purpose of the - SpellCheckComponent is to hook it into the request handler that - handles your normal user queries so that a separate request is - not needed to get suggestions. - - IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS - NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM! - - See http://wiki.apache.org/solr/SpellCheckComponent for details - on the request parameters. - --> - <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy"> - <lst name="defaults"> - <!-- Solr will use suggestions from both the 'default' spellchecker - and from the 'wordbreak' spellchecker and combine them. - collations (re-written queries) can include a combination of - corrections from both spellcheckers --> - <str name="spellcheck.dictionary">default</str> - <str name="spellcheck">on</str> - <str name="spellcheck.extendedResults">true</str> - <str name="spellcheck.count">10</str> - <str name="spellcheck.alternativeTermCount">5</str> - <str name="spellcheck.maxResultsForSuggest">5</str> - <str name="spellcheck.collate">true</str> - <str name="spellcheck.collateExtendedResults">true</str> - <str name="spellcheck.maxCollationTries">10</str> - <str name="spellcheck.maxCollations">5</str> - </lst> - <arr name="last-components"> - <str>spellcheck</str> - </arr> - </requestHandler> - - <!-- Term Vector Component - - http://wiki.apache.org/solr/TermVectorComponent - --> - <searchComponent name="tvComponent" class="solr.TermVectorComponent"/> - - <!-- A request handler for demonstrating the term vector component - - This is purely as an example. - - In reality you will likely want to add the component to your - already specified request handlers. - --> - <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy"> - <lst name="defaults"> - <bool name="tv">true</bool> - </lst> - <arr name="last-components"> - <str>tvComponent</str> - </arr> - </requestHandler> - - <!-- Clustering Component. (Omitted here. See the default Solr example for a typical configuration.) --> - - <!-- Terms Component - - http://wiki.apache.org/solr/TermsComponent - - A component to return terms and document frequency of those - terms - --> - <searchComponent name="terms" class="solr.TermsComponent"/> - - <!-- A request handler for demonstrating the terms component --> - <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy"> - <lst name="defaults"> - <bool name="terms">true</bool> - <bool name="distrib">false</bool> - </lst> - <arr name="components"> - <str>terms</str> - </arr> - </requestHandler> - - - <!-- Query Elevation Component - - http://wiki.apache.org/solr/QueryElevationComponent - - a search component that enables you to configure the top - results for a given query regardless of the normal lucene - scoring. - --> - <searchComponent name="elevator" class="solr.QueryElevationComponent" > - <!-- pick a fieldType to analyze queries --> - <str name="queryFieldType">string</str> - </searchComponent> - - <!-- A request handler for demonstrating the elevator component --> - <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy"> - <lst name="defaults"> - <str name="echoParams">explicit</str> - </lst> - <arr name="last-components"> - <str>elevator</str> - </arr> - </requestHandler> - - <!-- Highlighting Component - - http://wiki.apache.org/solr/HighlightingParameters - --> - <searchComponent class="solr.HighlightComponent" name="highlight"> - <highlighting> - <!-- Configure the standard fragmenter --> - <!-- This could most likely be commented out in the "default" case --> - <fragmenter name="gap" - default="true" - class="solr.highlight.GapFragmenter"> - <lst name="defaults"> - <int name="hl.fragsize">100</int> - </lst> - </fragmenter> - - <!-- A regular-expression-based fragmenter - (for sentence extraction) - --> - <fragmenter name="regex" - class="solr.highlight.RegexFragmenter"> - <lst name="defaults"> - <!-- slightly smaller fragsizes work better because of slop --> - <int name="hl.fragsize">70</int> - <!-- allow 50% slop on fragment sizes --> - <float name="hl.regex.slop">0.5</float> - <!-- a basic sentence pattern --> - <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> - </lst> - </fragmenter> - - <!-- Configure the standard formatter --> - <formatter name="html" - default="true" - class="solr.highlight.HtmlFormatter"> - <lst name="defaults"> - <str name="hl.simple.pre"><![CDATA[<em>]]></str> - <str name="hl.simple.post"><![CDATA[</em>]]></str> - </lst> - </formatter> - - <!-- Configure the standard encoder --> - <encoder name="html" - class="solr.highlight.HtmlEncoder" /> - - <!-- Configure the standard fragListBuilder --> - <fragListBuilder name="simple" - class="solr.highlight.SimpleFragListBuilder"/> - - <!-- Configure the single fragListBuilder --> - <fragListBuilder name="single" - class="solr.highlight.SingleFragListBuilder"/> - - <!-- Configure the weighted fragListBuilder --> - <fragListBuilder name="weighted" - default="true" - class="solr.highlight.WeightedFragListBuilder"/> - - <!-- default tag FragmentsBuilder --> - <fragmentsBuilder name="default" - default="true" - class="solr.highlight.ScoreOrderFragmentsBuilder"> - <!-- - <lst name="defaults"> - <str name="hl.multiValuedSeparatorChar">/</str> - </lst> - --> - </fragmentsBuilder> - - <!-- multi-colored tag FragmentsBuilder --> - <fragmentsBuilder name="colored" - class="solr.highlight.ScoreOrderFragmentsBuilder"> - <lst name="defaults"> - <str name="hl.tag.pre"><![CDATA[ - <b style="background:yellow">,<b style="background:lawgreen">, - <b style="background:aquamarine">,<b style="background:magenta">, - <b style="background:palegreen">,<b style="background:coral">, - <b style="background:wheat">,<b style="background:khaki">, - <b style="background:lime">,<b style="background:deepskyblue">]]></str> - <str name="hl.tag.post"><![CDATA[</b>]]></str> - </lst> - </fragmentsBuilder> - - <boundaryScanner name="default" - default="true" - class="solr.highlight.SimpleBoundaryScanner"> - <lst name="defaults"> - <str name="hl.bs.maxScan">10</str> - <str name="hl.bs.chars">.,!? 	 </str> - </lst> - </boundaryScanner> - - <boundaryScanner name="breakIterator" - class="solr.highlight.BreakIteratorBoundaryScanner"> - <lst name="defaults"> - <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE --> - <str name="hl.bs.type">WORD</str> - <!-- language and country are used when constructing Locale object. --> - <!-- And the Locale object will be used when getting instance of BreakIterator --> - <str name="hl.bs.language">en</str> - <str name="hl.bs.country">US</str> - </lst> - </boundaryScanner> - </highlighting> - </searchComponent> - - - - <!-- Response Writers - - http://wiki.apache.org/solr/QueryResponseWriter - - Request responses will be written using the writer specified by - the 'wt' request parameter matching the name of a registered - writer. - - The "default" writer is the default and will be used if 'wt' is - not specified in the request. - --> - <!-- The following response writers are implicitly configured unless - overridden... - --> - <!-- - <queryResponseWriter name="xml" - default="true" - class="solr.XMLResponseWriter" /> - <queryResponseWriter name="json" class="solr.JSONResponseWriter"/> - <queryResponseWriter name="python" class="solr.PythonResponseWriter"/> - <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/> - <queryResponseWriter name="php" class="solr.PHPResponseWriter"/> - <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/> - <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/> - <queryResponseWriter name="schema.xml" class="solr.SchemaXmlResponseWriter"/> - --> - - <queryResponseWriter name="json" class="solr.JSONResponseWriter"> - <!-- For the purposes of the tutorial, JSON responses are written as - plain text so that they are easy to read in *any* browser. - If you expect a MIME type of "application/json" just remove this override. - --> - <str name="content-type">text/plain; charset=UTF-8</str> - </queryResponseWriter> - - <!-- - Custom response writers can be declared as needed... - --> - <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"> - <str name="template.base.dir">${velocity.template.base.dir:}</str> - <str name="solr.resource.loader.enabled">${velocity.solr.resource.loader.enabled:true}</str> - <str name="params.resource.loader.enabled">${velocity.params.resource.loader.enabled:false}</str> - </queryResponseWriter> - - <!-- XSLT response writer transforms the XML output by any xslt file found - in Solr's conf/xslt directory. Changes to xslt files are checked for - every xsltCacheLifetimeSeconds. - --> - <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter"> - <int name="xsltCacheLifetimeSeconds">5</int> - </queryResponseWriter> - - <!-- Uncomment this if for some reason we don't want to use the managed schema anymore - <schemaFactory class="ClassicIndexSchemaFactory"/> - --> - - <requestHandler name="/replication" class="solr.ReplicationHandler" > - <lst name="leader"> - <str name="enable">${solr.enable.leader:true}</str> - <str name="replicateAfter">startup</str> - <str name="replicateAfter">commit</str> - <str name="confFiles">schema.xml,stopwords.txt</str> - </lst> - <lst name="follower"> - <str name="enable">${solr.enable.follower:false}</str> - <str name="leaderUrl">${solr.leader.url:http://localhost:8983/solr}/${solr.core.name}</str> - <str name="pollInterval">${solr.poll.interval:00:00:60}</str> - </lst> - </requestHandler> - -</config> diff --git a/solr-config/cores/fundingSubType/conf/stopwords.txt b/solr-config/cores/fundingSubType/conf/stopwords.txt deleted file mode 100644 index 09697dce6e1..00000000000 --- a/solr-config/cores/fundingSubType/conf/stopwords.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - diff --git a/solr-config/cores/fundingSubType/conf/synonyms.txt b/solr-config/cores/fundingSubType/conf/synonyms.txt deleted file mode 100644 index eab4ee87537..00000000000 --- a/solr-config/cores/fundingSubType/conf/synonyms.txt +++ /dev/null @@ -1,29 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -#some test synonym mappings unlikely to appear in real input text -aaafoo => aaabar -bbbfoo => bbbfoo bbbbar -cccfoo => cccbar cccbaz -fooaaa,baraaa,bazaaa - -# Some synonym groups specific to this example -GB,gib,gigabyte,gigabytes -MB,mib,megabyte,megabytes -Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming -#after us won't split it into two words. - -# Synonym mappings can be used for spelling correction too -pixima => pixma - diff --git a/solr-config/cores/fundingSubType/core.properties b/solr-config/cores/fundingSubType/core.properties deleted file mode 100644 index 2235f5f5230..00000000000 --- a/solr-config/cores/fundingSubType/core.properties +++ /dev/null @@ -1,2 +0,0 @@ -name=fundingSubType -dataDir=/opt/solr/solr_data/fundingSubType \ No newline at end of file