Skip to content

Commit f951cbb

Browse files
zazass8rasbt
andauthored
Fixing of association rule lastest version (#1121)
* Updated FPGrowth/FPMax and Association Rules with the existence of missing values * Re-structure and document code * Update unit tests * Update CHANGELOG.md * Modify the corresponding documentation in Jupyter notebooks * Final modifications * Fix association rules and corresponding tests * Fix typos * Fixing memory usage increase * Fixing memory usage increase --------- Co-authored-by: Sebastian Raschka <[email protected]>
1 parent 71f2531 commit f951cbb

File tree

3 files changed

+15
-5
lines changed

3 files changed

+15
-5
lines changed

docs/sources/user_guide/frequent_patterns/association_rules.ipynb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2418,13 +2418,16 @@
24182418
},
24192419
{
24202420
"cell_type": "code",
2421+
24212422
"execution_count": 20,
2423+
24222424
"metadata": {},
24232425
"outputs": [
24242426
{
24252427
"name": "stderr",
24262428
"output_type": "stream",
24272429
"text": [
2430+
24282431
"/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
24292432
" df.iloc[idx[i], col[i]] = np.nan\n",
24302433
"/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
@@ -2438,6 +2441,7 @@
24382441
"/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
24392442
" df.iloc[idx[i], col[i]] = np.nan\n",
24402443
"/tmp/ipykernel_34953/2823279667.py:23: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'nan' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.\n",
2444+
24412445
" df.iloc[idx[i], col[i]] = np.nan\n"
24422446
]
24432447
},
@@ -2489,6 +2493,7 @@
24892493
" <td>True</td>\n",
24902494
" <td>False</td>\n",
24912495
" <td>NaN</td>\n",
2496+
24922497
" </tr>\n",
24932498
" <tr>\n",
24942499
" <th>1</th>\n",
@@ -2710,6 +2715,7 @@
27102715
]
27112716
},
27122717
"execution_count": 21,
2718+
27132719
"metadata": {},
27142720
"output_type": "execute_result"
27152721
}
@@ -2718,6 +2724,7 @@
27182724
"frequent_itemsets = fpgrowth(df, min_support=0.6, null_values = True, use_colnames=True)\n",
27192725
"# frequent_itemsets = fpmax(df, min_support=0.6, null_values = True, use_colnames=True)\n",
27202726
"rules = association_rules(frequent_itemsets, len(df), df, null_values = True, metric=\"confidence\", min_threshold=0.8)\n",
2727+
27212728
"rules"
27222729
]
27232730
},

mlxtend/frequent_patterns/association_rules.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535
def association_rules(
3636
df: pd.DataFrame,
37-
num_itemsets: int,
37+
num_itemsets: Optional[int] = 1,
3838
df_orig: Optional[pd.DataFrame] = None,
3939
null_values=False,
4040
metric="confidence",
@@ -54,8 +54,8 @@ def association_rules(
5454
df_orig : pandas DataFrame (default: None)
5555
DataFrame with original input data. Only provided when null_values exist
5656
57-
num_itemsets : int
58-
Number of transactions in original input data
57+
num_itemsets : int (default: 1)
58+
Number of transactions in original input data (df_orig)
5959
6060
null_values : bool (default: False)
6161
In case there are null values as NaNs in the original input data
@@ -119,6 +119,10 @@ def association_rules(
119119
if null_values and df_orig is None:
120120
raise TypeError("If null values exist, df_orig must be provided.")
121121

122+
# if null values exist, num_itemsets must be provided
123+
if null_values and num_itemsets == 1:
124+
raise TypeError("If null values exist, num_itemsets must be provided.")
125+
122126
# check for valid input
123127
fpc.valid_input_check(df_orig, null_values)
124128

@@ -285,7 +289,6 @@ def certainty_metric_helper(sAC, sA, sC, disAC, disA, disC, dis_int, dis_int_):
285289
# if the input dataframe is complete
286290
if not null_values:
287291
disAC, disA, disC, dis_int, dis_int_ = 0, 0, 0, 0, 0
288-
num_itemsets = 1
289292

290293
else:
291294
an = list(antecedent)

mlxtend/frequent_patterns/fpcommon.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def setup_fptree(df, min_support):
3131
)
3232

3333
item_support = np.array(
34-
np.sum(np.logical_or(df.values == 1, df.values is True), axis=0)
34+
np.nansum(df.values, axis=0)
3535
/ (float(num_itemsets) - np.nansum(disabled, axis=0))
3636
)
3737
item_support = item_support.reshape(-1)

0 commit comments

Comments
 (0)