Skip to content

Commit e9a1bed

Browse files
authored
Merge pull request #1797 from 4dn-dcic/facet_grouping_not_facet
Facet Terms Grouping
2 parents aa93a77 + 06180fb commit e9a1bed

File tree

8 files changed

+1088
-746
lines changed

8 files changed

+1088
-746
lines changed

CHANGELOG.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,15 @@ fourfront
66
Change Log
77
----------
88

9+
5.3.8
10+
=====
11+
12+
`PR Facet terms grouping <https://github.com/4dn-dcic/fourfront/pull/1797>`_
13+
14+
* The update allows for a hierarchical display of terms grouped under a grouping term. It also enables searching and selection by group names and individual terms.
15+
* To use, add the group_by_field property in schema json to define grouping for a facet. The experiment type facet's terms of ExperimentSet are grouped in this PR.
16+
17+
918
5.3.7
1019
=====
1120

package-lock.json

Lines changed: 937 additions & 735 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@
107107
"dependencies": {
108108
"@fortawesome/fontawesome-free": "^5.15.4",
109109
"@hms-dbmi-bgm/react-workflow-viz": "0.1.7",
110-
"@hms-dbmi-bgm/shared-portal-components": "git+https:github.com/4dn-dcic/shared-portal-components#0.1.63",
110+
"@hms-dbmi-bgm/shared-portal-components": "git+https:github.com/4dn-dcic/shared-portal-components#0.1.64",
111111
"auth0-lock": "^11.33.1",
112112
"d3": "^7.5.0",
113113
"date-fns": "^2.28.0",

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[tool.poetry]
22
# Note: Various modules refer to this system as "encoded", not "fourfront".
33
name = "encoded"
4-
version = "5.3.7"
4+
version = "5.3.8"
55
description = "4DN-DCIC Fourfront"
66
authors = ["4DN-DCIC Team <[email protected]>"]
77
license = "MIT"

src/encoded/schemas/experiment_set.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,9 @@
151151
"description" : "The categorization of experiments within these Experiment Sets"
152152
},
153153
"experiments_in_set.experiment_type.display_title": {
154-
"title": "Experiment Type"
154+
"title": "Experiment Type",
155+
"group_by_field": "experiments_in_set.experiment_type.assay_subclass_short",
156+
"tooltip_term_substitue": "experiment type"
155157
},
156158
"dataset_label" : {
157159
"title" : "Dataset"

src/encoded/schemas/experiment_type.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,13 +253,16 @@
253253
"title": "Experiment Category"
254254
},
255255
"assay_subclass_short": {
256-
"title": "Assay Type"
256+
"title": "Experiment Type Grouping"
257257
},
258258
"other_tags": {
259259
"title": "Additional Categories"
260260
}
261261
},
262262
"columns": {
263+
"assay_subclass_short": {
264+
"title": "Experiment Type Grouping"
265+
},
263266
"experiment_category": {
264267
"title": "Experiment Category"
265268
},

src/encoded/search.py

Lines changed: 109 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -145,15 +145,15 @@ def search(context, request, search_type=None, return_generator=False, forced_ty
145145
# TODO: implement BOOST here?
146146

147147
### Set filters
148-
search, query_filters = set_filters(request, search, result, principals, doc_types)
148+
search, query_filters, base_field_filters = set_filters(request, search, result, principals, doc_types)
149149

150150
### Set starting facets
151151
additional_facets = request.normalized_params.getall('additional_facet')
152152
facets = initialize_facets(request, doc_types, prepared_terms, schemas, additional_facets)
153153

154154
### Adding facets, plus any optional custom aggregations.
155155
### Uses 'size' and 'from_' to conditionally skip (no facets if from > 0; no aggs if size > 0).
156-
search = set_facets(search, facets, query_filters, string_query, request, doc_types, custom_aggregations, size, from_)
156+
search = set_facets(search, facets, query_filters, string_query, request, doc_types, custom_aggregations, base_field_filters, size, from_)
157157

158158
### Add preference from session, if available
159159
search_session_id = None
@@ -174,7 +174,7 @@ def search(context, request, search_type=None, return_generator=False, forced_ty
174174

175175
### Record total number of hits
176176
result['total'] = total = es_results['hits']['total']['value']
177-
result['facets'] = format_facets(es_results, facets, total, additional_facets, search_frame)
177+
result['facets'] = format_facets(es_results, facets, total, additional_facets, result['filters'], search_frame)
178178
result['aggregations'] = format_extra_aggregations(es_results)
179179

180180
# After ES7 upgrade, 'total' does not return the exact count if it is >10000. This restriction
@@ -798,6 +798,10 @@ def set_filters(request, search, result, principals, doc_types):
798798
if 'OntologyTerm' not in doc_types:
799799
field_filters['[email protected]']['must_not_terms'].append('OntologyTerm')
800800

801+
# base filters only includes principals, doc_type and excludes some status and item types
802+
# it is essentially useful for the group by facet terms aggregation
803+
base_field_filters = create_field_filters(deepcopy(field_filters))
804+
801805
for field, term in request.normalized_params.items():
802806
not_field = False # keep track if query is NOT (!)
803807
exists_field = False # keep track of null values
@@ -935,7 +939,7 @@ def set_filters(request, search, result, principals, doc_types):
935939
prev_search['query']['bool']['filter'] = final_filters
936940
search.update_from_dict(prev_search)
937941

938-
return search, final_filters
942+
return search, final_filters, base_field_filters
939943

940944

941945
def initialize_additional_facets(request, doc_types, additional_facets, append_facets, current_type_schema):
@@ -1052,6 +1056,7 @@ def initialize_facets(request, doc_types, prepared_terms, schemas, additional_fa
10521056
# TODO: instead of workaround, '!' could be excluded while generating query results
10531057
if title_field.endswith('!'):
10541058
title_field = title_field[:-1]
1059+
use_field = use_field[:-1]
10551060

10561061
# if searching for a display_title, use the title of parent object
10571062
# use `is_object_title` to keep track of this
@@ -1061,7 +1066,7 @@ def initialize_facets(request, doc_types, prepared_terms, schemas, additional_fa
10611066
else:
10621067
is_object_title = False
10631068

1064-
if title_field in used_facets or title_field in disabled_facets:
1069+
if title_field in used_facets or title_field in disabled_facets or use_field in used_facets:
10651070
# Cancel if already in facets or is disabled
10661071
continue
10671072
used_facets.append(title_field)
@@ -1244,7 +1249,7 @@ def generate_filters_for_terms_agg_from_search_filters(query_field, search_filte
12441249
return facet_filters
12451250

12461251

1247-
def set_facets(search, facets, search_filters, string_query, request, doc_types, custom_aggregations=None, size=25, from_=0):
1252+
def set_facets(search, facets, search_filters, string_query, request, doc_types, custom_aggregations=None, base_field_filters=None, size=25, from_=0):
12481253
"""
12491254
Sets facets in the query as ElasticSearch aggregations, with each aggregation to be
12501255
filtered by search_filters minus filter affecting facet field in order to get counts
@@ -1322,6 +1327,7 @@ def set_facets(search, facets, search_filters, string_query, request, doc_types,
13221327
else:
13231328

13241329
facet['aggregation_type'] = 'terms'
1330+
13251331
term_aggregation = {
13261332
"terms" : {
13271333
'size' : 100, # Maximum terms returned (default=10); see https://github.com/10up/ElasticPress/wiki/Working-with-Aggregations
@@ -1337,6 +1343,32 @@ def set_facets(search, facets, search_filters, string_query, request, doc_types,
13371343
'filter': {'bool': facet_filters},
13381344
}
13391345

1346+
# add extra ES sub-query to fetch facet terms and their grouping terms to build
1347+
# parent - child hierarchy (we always build full map, since the implementation in
1348+
# https://github.com/4dn-dcic/fourfront/blob/dc47659487aec88fb0c19145e48ebbd20588eba3/src/encoded/search.py
1349+
# fails when there are selected terms in filters but not listed in facets)
1350+
# Note: This aggregation is used in group_facet_terms func.
1351+
if 'group_by_field' in facet and base_field_filters:
1352+
aggs[facet['aggregation_type'] + ":" + agg_name + ":group_by"] = {
1353+
'aggs': {
1354+
"primary_agg" : {
1355+
"terms" : {
1356+
'size' : 100,
1357+
'field' : "embedded." + facet['group_by_field'] + ".raw",
1358+
'missing' : facet.get("missing_value_replacement", "No value"),
1359+
'aggs': {
1360+
"sub_terms" : {
1361+
"terms" : {
1362+
"field": query_field,
1363+
}
1364+
}
1365+
}
1366+
}
1367+
}
1368+
},
1369+
'filter': {'bool': deepcopy(base_field_filters['bool'])},
1370+
}
1371+
13401372
# Update facet with title, description from field_schema, if missing.
13411373
if facet.get('title') is None and field_schema and 'title' in field_schema:
13421374
facet['title'] = field_schema['title']
@@ -1423,7 +1455,7 @@ def execute_search(search):
14231455
return es_results
14241456

14251457

1426-
def format_facets(es_results, facets, total, additional_facets, search_frame='embedded'):
1458+
def format_facets(es_results, facets, total, additional_facets, filters, search_frame='embedded'):
14271459
"""
14281460
Format the facets for the final results based on the es results.
14291461
Sort based off of the 'order' of the facets
@@ -1486,6 +1518,10 @@ def format_facets(es_results, facets, total, additional_facets, search_frame='em
14861518
else:
14871519
# Default - terms, range, or histogram buckets. Buckets may not be present
14881520
result_facet['terms'] = aggregations[full_agg_name]["primary_agg"]["buckets"]
1521+
1522+
if 'group_by_field' in result_facet and (full_agg_name + ':group_by') in aggregations:
1523+
group_facet_terms(result_facet, aggregations[full_agg_name + ':group_by'], filters)
1524+
14891525
# Choosing to show facets with one term for summary info on search it provides
14901526
if len(result_facet.get('terms', [])) < 1:
14911527
continue
@@ -1497,6 +1533,56 @@ def format_facets(es_results, facets, total, additional_facets, search_frame='em
14971533

14981534
return result
14991535

1536+
1537+
def group_facet_terms(result_facet, agg, filters):
1538+
if result_facet is None or agg is None:
1539+
return
1540+
1541+
def transpose_dict(original_dict):
1542+
transposed_dict = {}
1543+
for key, values in original_dict.items():
1544+
for value in values:
1545+
if value not in transposed_dict:
1546+
transposed_dict[value] = [key]
1547+
else:
1548+
transposed_dict[value].append(key)
1549+
return transposed_dict
1550+
1551+
ret_result = { }
1552+
for bucket in agg["primary_agg"]["buckets"]:
1553+
ret_result[bucket['key']] = [str(item['key']) for item in bucket['sub_terms']['buckets']]
1554+
# transpose {group 1: [term 1_1, term 1_2, ... term 1_n]} to
1555+
# {term 1_1: [group1], term 1_2: [group1], .. term 1_n: [group1]} for faster traversing (see below)
1556+
transposed = transpose_dict(ret_result)
1557+
1558+
group_terms_dict = dict()
1559+
added_keys_dict = dict()
1560+
1561+
for term in result_facet['terms']:
1562+
group_key = transposed[term['key']][0] if term['key'] in transposed else '(Missing group)'
1563+
if group_key not in group_terms_dict:
1564+
group_terms_dict[group_key] = {'key': group_key, 'doc_count': 0, 'terms': []}
1565+
group_term = group_terms_dict[group_key]
1566+
# calculate total doc_count
1567+
group_term['doc_count'] += term['doc_count']
1568+
group_term['terms'].append(term)
1569+
added_keys_dict[term['key']] = True
1570+
# add terms not in results but exists in filters
1571+
# (ui handles it for regular facets, where as it is not possible to build parent-child relation for grouping facet terms)
1572+
for filter in filters:
1573+
if (filter['field'] != result_facet['field'] or filter['term'] in added_keys_dict):
1574+
continue
1575+
group_key = transposed[filter['term']][0] if filter['term'] in transposed else '(Missing group)'
1576+
if group_key not in group_terms_dict:
1577+
group_terms_dict[group_key] = {'key': group_key, 'doc_count': 0, 'terms': []}
1578+
group_term = group_terms_dict[group_key]
1579+
group_term['terms'].append({'key': filter['term'], 'doc_count': 0})
1580+
1581+
result_facet['terms'] = sorted( list(group_terms_dict.values()), key=lambda t: t['doc_count'], reverse=True)
1582+
del result_facet['group_by_field']
1583+
result_facet['has_group_by'] = True
1584+
1585+
15001586
def format_extra_aggregations(es_results):
15011587
if 'aggregations' not in es_results:
15021588
return {}
@@ -1586,6 +1672,7 @@ def get_iterable_search_results(request, search_path='/search/', param_lists=Non
15861672
subreq = make_search_subreq(request, '{}?{}'.format(search_path, urlencode(param_lists, True)) )
15871673
return iter_search_results(None, subreq, **kwargs)
15881674

1675+
15891676
def get_total_from_facets(facets, total):
15901677
'''
15911678
Loops through facets, and grabs total count from the term having type=Item
@@ -1599,10 +1686,25 @@ def get_total_from_facets(facets, total):
15991686
#fallback
16001687
return total
16011688

1689+
1690+
def create_field_filters(field_filters):
1691+
must_filters = []
1692+
must_not_filters = []
1693+
for query_field, filters in field_filters.items():
1694+
must_terms = {'terms': {query_field: filters['must_terms']}} if filters['must_terms'] else {}
1695+
must_not_terms = {'terms': {query_field: filters['must_not_terms']}} if filters['must_not_terms'] else {}
1696+
final_filters = {'bool': {'must': must_filters, 'must_not': must_not_filters}}
1697+
1698+
if must_terms: must_filters.append(must_terms)
1699+
if must_not_terms: must_not_filters.append(must_not_terms)
1700+
1701+
return final_filters
1702+
16021703
# Update? used in ./batch_download.py
16031704
def iter_search_results(context, request, **kwargs):
16041705
return search(context, request, return_generator=True, **kwargs)
16051706

1707+
16061708
def build_table_columns(request, schemas, doc_types):
16071709

16081710
any_abstract_types = 'Item' in doc_types

0 commit comments

Comments
 (0)