Skip to content

Commit

Permalink
Merge pull request #10706 from QualitativeDataRepository/QDR-refactor…
Browse files Browse the repository at this point in the history
…_avoidJoin_in_indexing

IQSS/10705: Refactor permission filter query in indexing
  • Loading branch information
landreev authored Oct 24, 2024
2 parents c369517 + 86ea279 commit 03538f0
Showing 1 changed file with 65 additions and 198 deletions.
263 changes: 65 additions & 198 deletions src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
import jakarta.inject.Inject;
import jakarta.inject.Named;
import jakarta.persistence.NoResultException;

import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.SortClause;
import org.apache.solr.client.solrj.SolrServerException;
Expand All @@ -52,6 +54,8 @@ public class SearchServiceBean {

private static final Logger logger = Logger.getLogger(SearchServiceBean.class.getCanonicalName());

private static final String ALL_GROUPS = "*";

/**
* We're trying to make the SearchServiceBean lean, mean, and fast, with as
* few injections of EJBs as possible.
Expand Down Expand Up @@ -182,6 +186,7 @@ public SolrQueryResponse search(

SolrQuery solrQuery = new SolrQuery();
query = SearchUtil.sanitizeQuery(query);

solrQuery.setQuery(query);
if (sortField != null) {
// is it ok not to specify any sort? - there are cases where we
Expand Down Expand Up @@ -323,24 +328,13 @@ public SolrQueryResponse search(
}
}

//I'm not sure if just adding null here is good for hte permissions system... i think it needs something
if(dataverses != null) {
for(Dataverse dataverse : dataverses) {
// -----------------------------------
// PERMISSION FILTER QUERY
// -----------------------------------
String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, dataverse, onlyDatatRelatedToMe, addFacets);
if (permissionFilterQuery != null) {
solrQuery.addFilterQuery(permissionFilterQuery);
}
}
} else {
String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, null, onlyDatatRelatedToMe, addFacets);
if (permissionFilterQuery != null) {
solrQuery.addFilterQuery(permissionFilterQuery);
}
// -----------------------------------
// PERMISSION FILTER QUERY
// -----------------------------------
String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, onlyDatatRelatedToMe, addFacets);
if (permissionFilterQuery != null) {
solrQuery.addFilterQuery(permissionFilterQuery);
}


/**
* @todo: do sanity checking... throw error if negative
Expand Down Expand Up @@ -994,7 +988,7 @@ public String getCapitalizedName(String name) {
*
* @return
*/
private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, Dataverse dataverse, boolean onlyDatatRelatedToMe, boolean addFacets) {
private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, boolean onlyDatatRelatedToMe, boolean addFacets) {

User user = dataverseRequest.getUser();
if (user == null) {
Expand All @@ -1003,226 +997,99 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ
if (solrQuery == null) {
throw new NullPointerException("solrQuery cannot be null");
}
/**
* @todo For people who are not logged in, should we show stuff indexed
* with "AllUsers" group or not? If so, uncomment the allUsersString
* stuff below.
*/
// String allUsersString = IndexServiceBean.getGroupPrefix() + AllUsers.get().getAlias();
// String publicOnly = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + " OR " + allUsersString + ")";
String publicOnly = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + ")";
// String publicOnly = "{!join from=" + SearchFields.GROUPS + " to=" + SearchFields.PERMS + "}id:" + IndexServiceBean.getPublicGroupString();
// initialize to public only to be safe
String dangerZoneNoSolrJoin = null;


if (user instanceof PrivateUrlUser) {
user = GuestUser.get();
}

AuthenticatedUser au = null;
ArrayList<String> groupList = new ArrayList<String>();
AuthenticatedUser au = null;
Set<Group> groups;

if (user instanceof GuestUser) {
// Yes, GuestUser may be part of one or more groups; such as IP Groups.
groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest));
} else {
if (!(user instanceof AuthenticatedUser)) {
logger.severe("Should never reach here. A User must be an AuthenticatedUser or a Guest");
throw new IllegalStateException("A User must be an AuthenticatedUser or a Guest");
}

boolean avoidJoin = FeatureFlags.AVOID_EXPENSIVE_SOLR_JOIN.enabled();

if (user instanceof AuthenticatedUser) {
au = (AuthenticatedUser) user;

// ----------------------------------------------------
// (3) Is this a Super User?
// Is this a Super User?
// If so, they can see everything
// ----------------------------------------------------
if (au.isSuperuser()) {
// Somewhat dangerous because this user (a superuser) will be able
// to see everything in Solr with no regard to permissions. But it's
// been this way since Dataverse 4.0. So relax. :)

return dangerZoneNoSolrJoin;
return buildPermissionFilterQuery(avoidJoin, ALL_GROUPS);
}

// ----------------------------------------------------
// (4) User is logged in AND onlyDatatRelatedToMe == true
// User is logged in AND onlyDatatRelatedToMe == true
// Yes, give back everything -> the settings will be in
// the filterqueries given to search
// the filterqueries given to search
// ----------------------------------------------------
if (onlyDatatRelatedToMe == true) {
if (systemConfig.myDataDoesNotUsePermissionDocs()) {
logger.fine("old 4.2 behavior: MyData is not using Solr permission docs");
return dangerZoneNoSolrJoin;
return buildPermissionFilterQuery(avoidJoin, ALL_GROUPS);
} else {
// fall-through
logger.fine("new post-4.2 behavior: MyData is using Solr permission docs");
}
}

// ----------------------------------------------------
// (5) Work with Authenticated User who is not a Superuser
// Work with Authenticated User who is not a Superuser
// ----------------------------------------------------

groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest));
groupList.add(IndexServiceBean.getGroupPerUserPrefix() + au.getId());
}

if (FeatureFlags.AVOID_EXPENSIVE_SOLR_JOIN.enabled()) {
/**
* Instead of doing a super expensive join, we will rely on the
* new boolean field PublicObject:true for public objects. This field
* is indexed on the content document itself, rather than a permission
* document. An additional join will be added only for any extra,
* more restricted groups that the user may be part of.
* **Note the experimental nature of this optimization**.
*/
StringBuilder sb = new StringBuilder();
StringBuilder sbgroups = new StringBuilder();

// All users, guests and authenticated, should see all the
// documents marked as publicObject_b:true, at least:
sb.append(SearchFields.PUBLIC_OBJECT + ":" + true);
// In addition to the user referenced directly, we will also
// add joins on all the non-public groups that may exist for the
// user:

// One or more groups *may* also be available for this user. Once again,
// do note that Guest users may be part of some groups, such as
// IP groups.

int groupCounter = 0;
// Authenticated users and GuestUser may be part of one or more groups; such
// as IP Groups.
groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest));

// An AuthenticatedUser should also be able to see all the content
// on which they have direct permissions:
if (au != null) {
groupCounter++;
sbgroups.append(IndexServiceBean.getGroupPerUserPrefix() + au.getId());
}

// In addition to the user referenced directly, we will also
// add joins on all the non-public groups that may exist for the
// user:
for (Group group : groups) {
String groupAlias = group.getAlias();
if (groupAlias != null && !groupAlias.isEmpty() && !groupAlias.startsWith("builtIn")) {
groupCounter++;
if (groupCounter > 1) {
sbgroups.append(" OR ");
}
sbgroups.append(IndexServiceBean.getGroupPrefix() + groupAlias);
}
}

if (groupCounter > 1) {
// If there is more than one group, the parentheses must be added:
sbgroups.insert(0, "(");
sbgroups.append(")");
}

if (groupCounter > 0) {
// If there are any groups for this user, an extra join must be
// added to the query, and the extra sub-query must be added to
// the combined Solr query:
sb.append(" OR {!join from=" + SearchFields.DEFINITION_POINT + " to=id v=$q1}");
// Add the subquery to the combined Solr query:
solrQuery.setParam("q1", SearchFields.DISCOVERABLE_BY + ":" + sbgroups.toString());
logger.info("The sub-query q1 set to " + SearchFields.DISCOVERABLE_BY + ":" + sbgroups.toString());
}

String ret = sb.toString();
logger.fine("Returning experimental query: " + ret);
return ret;
}

// END OF EXPERIMENTAL OPTIMIZATION

// Old, un-optimized way of handling permissions.
// Largely left intact, minus the lookups that have already been performed
// above.

// ----------------------------------------------------
// (1) Is this a GuestUser?
// ----------------------------------------------------
if (user instanceof GuestUser) {

StringBuilder sb = new StringBuilder();

String groupsFromProviders = "";
for (Group group : groups) {
logger.fine("found group " + group.getIdentifier() + " with alias " + group.getAlias());
String groupAlias = group.getAlias();
if (groupAlias != null && !groupAlias.isEmpty()) {
sb.append(" OR ");
// i.e. group_builtIn/all-users, ip/ipGroup3
sb.append(IndexServiceBean.getGroupPrefix()).append(groupAlias);
}
}
groupsFromProviders = sb.toString();
logger.fine("groupsFromProviders:" + groupsFromProviders);
String guestWithGroups = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + groupsFromProviders + ")";
logger.fine(guestWithGroups);
return guestWithGroups;
}

// ----------------------------------------------------
// (5) Work with Authenticated User who is not a Superuser
// ----------------------------------------------------
// It was already confirmed, that if the user is not GuestUser, we
// have an AuthenticatedUser au which is not null.
/**
* @todo all this code needs cleanup and clarification.
*/
/**
* Every AuthenticatedUser is part of a "User Private Group" (UGP), a
* concept we borrow from RHEL:
* https://access.redhat.com/site/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Deployment_Guide/ch-Managing_Users_and_Groups.html#s2-users-groups-private-groups
*/
/**
* @todo rename this from publicPlusUserPrivateGroup. Confusing
*/
// safe default: public only
String publicPlusUserPrivateGroup = publicOnly;
// + (onlyDatatRelatedToMe ? "" : (publicOnly + " OR "))
// + "{!join from=" + SearchFields.GROUPS + " to=" + SearchFields.PERMS + "}id:" + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + ")";

// /**
// * @todo add onlyDatatRelatedToMe option into the experimental JOIN
// * before enabling it.
// */
/**
* From a search perspective, we don't care about if the group was
* created within one dataverse or another. We just want a list of *all*
* the groups the user is part of. We are greedy. We want all BuiltIn
* Groups, Shibboleth Groups, IP Groups, "system" groups, everything.
*
* A JOIN on "permission documents" will determine if the user can find
* a given "content document" (dataset version, etc) in Solr.
*/
String groupsFromProviders = "";
StringBuilder sb = new StringBuilder();
for (Group group : groups) {
logger.fine("found group " + group.getIdentifier() + " with alias " + group.getAlias());
String groupAlias = group.getAlias();
if (groupAlias != null && !groupAlias.isEmpty()) {
sb.append(" OR ");
// i.e. group_builtIn/all-users, group_builtIn/authenticated-users, group_1-explictGroup1, group_shib/2
sb.append(IndexServiceBean.getGroupPrefix() + groupAlias);
if (groupAlias != null && !groupAlias.isEmpty() && (!avoidJoin || !groupAlias.startsWith("builtIn"))) {
groupList.add(IndexServiceBean.getGroupPrefix() + groupAlias);
}
}
groupsFromProviders = sb.toString();

logger.fine(groupsFromProviders);
if (true) {
/**
* @todo get rid of "experimental" in name
*/
String experimentalJoin = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + " OR " + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + groupsFromProviders + ")";
publicPlusUserPrivateGroup = experimentalJoin;
if (!avoidJoin) {
// Add the public group
groupList.add(0, IndexServiceBean.getPublicGroupString());
}

String groupString = null;
//If we have additional groups, format them correctly into a search string, with parens if there is more than one
if (groupList.size() > 1) {
groupString = "(" + StringUtils.join(groupList, " OR ") + ")";
} else if (groupList.size() == 1) {
groupString = groupList.get(0);
}

//permissionFilterQuery = publicPlusUserPrivateGroup;
logger.fine(publicPlusUserPrivateGroup);

return publicPlusUserPrivateGroup;

logger.fine("Groups: " + groupString);
String permissionQuery = buildPermissionFilterQuery(avoidJoin, groupString);
logger.fine("Permission Query: " + permissionQuery);
return permissionQuery;
}

private String buildPermissionFilterQuery(boolean avoidJoin, String permissionFilterGroups) {
String query = (avoidJoin&& !isAllGroups(permissionFilterGroups)) ? SearchFields.PUBLIC_OBJECT + ":" + true : "";
if (permissionFilterGroups != null && !isAllGroups(permissionFilterGroups)) {
if (!query.isEmpty()) {
query = "(" + query + " OR " + "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":" + permissionFilterGroups + ")";
} else {
query = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":" + permissionFilterGroups;
}
}
return query;
}

private boolean isAllGroups(String groups) {
return (groups!=null &&groups.equals(ALL_GROUPS));
}
}

0 comments on commit 03538f0

Please sign in to comment.