@@ -145,15 +145,15 @@ def search(context, request, search_type=None, return_generator=False, forced_ty
145
145
# TODO: implement BOOST here?
146
146
147
147
### Set filters
148
- search , query_filters = set_filters (request , search , result , principals , doc_types )
148
+ search , query_filters , base_field_filters = set_filters (request , search , result , principals , doc_types )
149
149
150
150
### Set starting facets
151
151
additional_facets = request .normalized_params .getall ('additional_facet' )
152
152
facets = initialize_facets (request , doc_types , prepared_terms , schemas , additional_facets )
153
153
154
154
### Adding facets, plus any optional custom aggregations.
155
155
### Uses 'size' and 'from_' to conditionally skip (no facets if from > 0; no aggs if size > 0).
156
- search = set_facets (search , facets , query_filters , string_query , request , doc_types , custom_aggregations , size , from_ )
156
+ search = set_facets (search , facets , query_filters , string_query , request , doc_types , custom_aggregations , base_field_filters , size , from_ )
157
157
158
158
### Add preference from session, if available
159
159
search_session_id = None
@@ -174,7 +174,7 @@ def search(context, request, search_type=None, return_generator=False, forced_ty
174
174
175
175
### Record total number of hits
176
176
result ['total' ] = total = es_results ['hits' ]['total' ]['value' ]
177
- result ['facets' ] = format_facets (es_results , facets , total , additional_facets , search_frame )
177
+ result ['facets' ] = format_facets (es_results , facets , total , additional_facets , result [ 'filters' ], search_frame )
178
178
result ['aggregations' ] = format_extra_aggregations (es_results )
179
179
180
180
# After ES7 upgrade, 'total' does not return the exact count if it is >10000. This restriction
@@ -798,6 +798,10 @@ def set_filters(request, search, result, principals, doc_types):
798
798
if 'OntologyTerm' not in doc_types :
799
799
field_filters [
'[email protected] ' ][
'must_not_terms' ].
append (
'OntologyTerm' )
800
800
801
+ # base filters only includes principals, doc_type and excludes some status and item types
802
+ # it is essentially useful for the group by facet terms aggregation
803
+ base_field_filters = create_field_filters (deepcopy (field_filters ))
804
+
801
805
for field , term in request .normalized_params .items ():
802
806
not_field = False # keep track if query is NOT (!)
803
807
exists_field = False # keep track of null values
@@ -935,7 +939,7 @@ def set_filters(request, search, result, principals, doc_types):
935
939
prev_search ['query' ]['bool' ]['filter' ] = final_filters
936
940
search .update_from_dict (prev_search )
937
941
938
- return search , final_filters
942
+ return search , final_filters , base_field_filters
939
943
940
944
941
945
def initialize_additional_facets (request , doc_types , additional_facets , append_facets , current_type_schema ):
@@ -1052,6 +1056,7 @@ def initialize_facets(request, doc_types, prepared_terms, schemas, additional_fa
1052
1056
# TODO: instead of workaround, '!' could be excluded while generating query results
1053
1057
if title_field .endswith ('!' ):
1054
1058
title_field = title_field [:- 1 ]
1059
+ use_field = use_field [:- 1 ]
1055
1060
1056
1061
# if searching for a display_title, use the title of parent object
1057
1062
# use `is_object_title` to keep track of this
@@ -1061,7 +1066,7 @@ def initialize_facets(request, doc_types, prepared_terms, schemas, additional_fa
1061
1066
else :
1062
1067
is_object_title = False
1063
1068
1064
- if title_field in used_facets or title_field in disabled_facets :
1069
+ if title_field in used_facets or title_field in disabled_facets or use_field in used_facets :
1065
1070
# Cancel if already in facets or is disabled
1066
1071
continue
1067
1072
used_facets .append (title_field )
@@ -1244,7 +1249,7 @@ def generate_filters_for_terms_agg_from_search_filters(query_field, search_filte
1244
1249
return facet_filters
1245
1250
1246
1251
1247
- def set_facets (search , facets , search_filters , string_query , request , doc_types , custom_aggregations = None , size = 25 , from_ = 0 ):
1252
+ def set_facets (search , facets , search_filters , string_query , request , doc_types , custom_aggregations = None , base_field_filters = None , size = 25 , from_ = 0 ):
1248
1253
"""
1249
1254
Sets facets in the query as ElasticSearch aggregations, with each aggregation to be
1250
1255
filtered by search_filters minus filter affecting facet field in order to get counts
@@ -1322,6 +1327,7 @@ def set_facets(search, facets, search_filters, string_query, request, doc_types,
1322
1327
else :
1323
1328
1324
1329
facet ['aggregation_type' ] = 'terms'
1330
+
1325
1331
term_aggregation = {
1326
1332
"terms" : {
1327
1333
'size' : 100 , # Maximum terms returned (default=10); see https://github.com/10up/ElasticPress/wiki/Working-with-Aggregations
@@ -1337,6 +1343,32 @@ def set_facets(search, facets, search_filters, string_query, request, doc_types,
1337
1343
'filter' : {'bool' : facet_filters },
1338
1344
}
1339
1345
1346
+ # add extra ES sub-query to fetch facet terms and their grouping terms to build
1347
+ # parent - child hierarchy (we always build full map, since the implementation in
1348
+ # https://github.com/4dn-dcic/fourfront/blob/dc47659487aec88fb0c19145e48ebbd20588eba3/src/encoded/search.py
1349
+ # fails when there are selected terms in filters but not listed in facets)
1350
+ # Note: This aggregation is used in group_facet_terms func.
1351
+ if 'group_by_field' in facet and base_field_filters :
1352
+ aggs [facet ['aggregation_type' ] + ":" + agg_name + ":group_by" ] = {
1353
+ 'aggs' : {
1354
+ "primary_agg" : {
1355
+ "terms" : {
1356
+ 'size' : 100 ,
1357
+ 'field' : "embedded." + facet ['group_by_field' ] + ".raw" ,
1358
+ 'missing' : facet .get ("missing_value_replacement" , "No value" ),
1359
+ 'aggs' : {
1360
+ "sub_terms" : {
1361
+ "terms" : {
1362
+ "field" : query_field ,
1363
+ }
1364
+ }
1365
+ }
1366
+ }
1367
+ }
1368
+ },
1369
+ 'filter' : {'bool' : deepcopy (base_field_filters ['bool' ])},
1370
+ }
1371
+
1340
1372
# Update facet with title, description from field_schema, if missing.
1341
1373
if facet .get ('title' ) is None and field_schema and 'title' in field_schema :
1342
1374
facet ['title' ] = field_schema ['title' ]
@@ -1423,7 +1455,7 @@ def execute_search(search):
1423
1455
return es_results
1424
1456
1425
1457
1426
- def format_facets (es_results , facets , total , additional_facets , search_frame = 'embedded' ):
1458
+ def format_facets (es_results , facets , total , additional_facets , filters , search_frame = 'embedded' ):
1427
1459
"""
1428
1460
Format the facets for the final results based on the es results.
1429
1461
Sort based off of the 'order' of the facets
@@ -1486,6 +1518,10 @@ def format_facets(es_results, facets, total, additional_facets, search_frame='em
1486
1518
else :
1487
1519
# Default - terms, range, or histogram buckets. Buckets may not be present
1488
1520
result_facet ['terms' ] = aggregations [full_agg_name ]["primary_agg" ]["buckets" ]
1521
+
1522
+ if 'group_by_field' in result_facet and (full_agg_name + ':group_by' ) in aggregations :
1523
+ group_facet_terms (result_facet , aggregations [full_agg_name + ':group_by' ], filters )
1524
+
1489
1525
# Choosing to show facets with one term for summary info on search it provides
1490
1526
if len (result_facet .get ('terms' , [])) < 1 :
1491
1527
continue
@@ -1497,6 +1533,56 @@ def format_facets(es_results, facets, total, additional_facets, search_frame='em
1497
1533
1498
1534
return result
1499
1535
1536
+
1537
+ def group_facet_terms (result_facet , agg , filters ):
1538
+ if result_facet is None or agg is None :
1539
+ return
1540
+
1541
+ def transpose_dict (original_dict ):
1542
+ transposed_dict = {}
1543
+ for key , values in original_dict .items ():
1544
+ for value in values :
1545
+ if value not in transposed_dict :
1546
+ transposed_dict [value ] = [key ]
1547
+ else :
1548
+ transposed_dict [value ].append (key )
1549
+ return transposed_dict
1550
+
1551
+ ret_result = { }
1552
+ for bucket in agg ["primary_agg" ]["buckets" ]:
1553
+ ret_result [bucket ['key' ]] = [str (item ['key' ]) for item in bucket ['sub_terms' ]['buckets' ]]
1554
+ # transpose {group 1: [term 1_1, term 1_2, ... term 1_n]} to
1555
+ # {term 1_1: [group1], term 1_2: [group1], .. term 1_n: [group1]} for faster traversing (see below)
1556
+ transposed = transpose_dict (ret_result )
1557
+
1558
+ group_terms_dict = dict ()
1559
+ added_keys_dict = dict ()
1560
+
1561
+ for term in result_facet ['terms' ]:
1562
+ group_key = transposed [term ['key' ]][0 ] if term ['key' ] in transposed else '(Missing group)'
1563
+ if group_key not in group_terms_dict :
1564
+ group_terms_dict [group_key ] = {'key' : group_key , 'doc_count' : 0 , 'terms' : []}
1565
+ group_term = group_terms_dict [group_key ]
1566
+ # calculate total doc_count
1567
+ group_term ['doc_count' ] += term ['doc_count' ]
1568
+ group_term ['terms' ].append (term )
1569
+ added_keys_dict [term ['key' ]] = True
1570
+ # add terms not in results but exists in filters
1571
+ # (ui handles it for regular facets, where as it is not possible to build parent-child relation for grouping facet terms)
1572
+ for filter in filters :
1573
+ if (filter ['field' ] != result_facet ['field' ] or filter ['term' ] in added_keys_dict ):
1574
+ continue
1575
+ group_key = transposed [filter ['term' ]][0 ] if filter ['term' ] in transposed else '(Missing group)'
1576
+ if group_key not in group_terms_dict :
1577
+ group_terms_dict [group_key ] = {'key' : group_key , 'doc_count' : 0 , 'terms' : []}
1578
+ group_term = group_terms_dict [group_key ]
1579
+ group_term ['terms' ].append ({'key' : filter ['term' ], 'doc_count' : 0 })
1580
+
1581
+ result_facet ['terms' ] = sorted ( list (group_terms_dict .values ()), key = lambda t : t ['doc_count' ], reverse = True )
1582
+ del result_facet ['group_by_field' ]
1583
+ result_facet ['has_group_by' ] = True
1584
+
1585
+
1500
1586
def format_extra_aggregations (es_results ):
1501
1587
if 'aggregations' not in es_results :
1502
1588
return {}
@@ -1586,6 +1672,7 @@ def get_iterable_search_results(request, search_path='/search/', param_lists=Non
1586
1672
subreq = make_search_subreq (request , '{}?{}' .format (search_path , urlencode (param_lists , True )) )
1587
1673
return iter_search_results (None , subreq , ** kwargs )
1588
1674
1675
+
1589
1676
def get_total_from_facets (facets , total ):
1590
1677
'''
1591
1678
Loops through facets, and grabs total count from the term having type=Item
@@ -1599,10 +1686,25 @@ def get_total_from_facets(facets, total):
1599
1686
#fallback
1600
1687
return total
1601
1688
1689
+
1690
+ def create_field_filters (field_filters ):
1691
+ must_filters = []
1692
+ must_not_filters = []
1693
+ for query_field , filters in field_filters .items ():
1694
+ must_terms = {'terms' : {query_field : filters ['must_terms' ]}} if filters ['must_terms' ] else {}
1695
+ must_not_terms = {'terms' : {query_field : filters ['must_not_terms' ]}} if filters ['must_not_terms' ] else {}
1696
+ final_filters = {'bool' : {'must' : must_filters , 'must_not' : must_not_filters }}
1697
+
1698
+ if must_terms : must_filters .append (must_terms )
1699
+ if must_not_terms : must_not_filters .append (must_not_terms )
1700
+
1701
+ return final_filters
1702
+
1602
1703
# Update? used in ./batch_download.py
1603
1704
def iter_search_results (context , request , ** kwargs ):
1604
1705
return search (context , request , return_generator = True , ** kwargs )
1605
1706
1707
+
1606
1708
def build_table_columns (request , schemas , doc_types ):
1607
1709
1608
1710
any_abstract_types = 'Item' in doc_types
0 commit comments