Skip to content

Commit 2e3adc3

Browse files
Improve DMatrix creation performance in python
The xgboost python python package serializes numpy arrays as json. This can take up a considerable amount of time in production workloads. This patch optimizes the specific case where the numpy is already in "C" contigous 32-bit floating point format, and can be loaded directly without the json layer. This can improve performance up to 35% in some cases, as can be seen by the microbenchmark added in xgboost/tests/python/microbench_numpy.py: Rows | Cols | Threads | Contiguous | Non-contiguous | Ratio ---------+----------+--------------+-----------------+-----------------+-------------- 15000 | 100 | 0 | 0.01686 | 0.01988 | 84.8% 15000 | 100 | 1 | 0.02897 | 0.04424 | 65.5% 15000 | 100 | 2 | 0.02579 | 0.0392 | 65.8% 15000 | 100 | 10 | 0.01581 | 0.02058 | 76.8% ---------+----------+--------------+-----------------+-----------------+-------------- 2 | 2000 | 0 | 0.001055 | 0.001205 | 87.6% 2 | 2000 | 1 | 0.0004465 | 0.0005689 | 78.5% 2 | 2000 | 2 | 0.0004609 | 0.000615 | 74.9% 2 | 2000 | 10 | 0.0005087 | 0.0005623 | 90.5% ---------+----------+--------------+-----------------+-----------------+--------------
1 parent 0c44067 commit 2e3adc3

File tree

3 files changed

+130
-10
lines changed

3 files changed

+130
-10
lines changed

python-package/xgboost/data.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -252,17 +252,30 @@ def _from_numpy_array(
252252
_check_data_shape(data)
253253
data, _ = _ensure_np_dtype(data, data.dtype)
254254
handle = ctypes.c_void_p()
255-
_check_call(
256-
_LIB.XGDMatrixCreateFromDense(
257-
_array_interface(data),
258-
make_jcargs(
259-
missing=float(missing),
260-
nthread=int(nthread),
261-
data_split_mode=int(data_split_mode),
262-
),
263-
ctypes.byref(handle),
255+
if isinstance(data, np.ndarray) and data.flags['C_CONTIGUOUS'] and data.dtype == np.float32:
256+
_check_call(
257+
_LIB.XGDMatrixCreateFromMat_omp(
258+
data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
259+
c_bst_ulong(data.shape[0]),
260+
c_bst_ulong(data.shape[1]),
261+
ctypes.c_float(missing),
262+
ctypes.byref(handle),
263+
ctypes.c_int(nthread),
264+
ctypes.c_int(data_split_mode),
265+
)
266+
)
267+
else:
268+
_check_call(
269+
_LIB.XGDMatrixCreateFromDense(
270+
_array_interface(data),
271+
make_jcargs(
272+
missing=float(missing),
273+
nthread=int(nthread),
274+
data_split_mode=int(data_split_mode),
275+
),
276+
ctypes.byref(handle),
277+
)
264278
)
265-
)
266279
return handle, feature_names, feature_types
267280

268281

tests/python/microbench_numpy.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import numpy as np
2+
import xgboost as xgb
3+
from collections import defaultdict
4+
import timeit
5+
6+
7+
def create_dmatrix(X, y, test_contiguous, nthread):
8+
if test_contiguous:
9+
X = np.ascontiguousarray(X).astype(np.float32)
10+
y = np.ascontiguousarray(y).astype(np.float32)
11+
assert X.flags["C_CONTIGUOUS"]
12+
else:
13+
X = np.asfortranarray(X)
14+
y = np.asfortranarray(y)
15+
assert not X.flags["C_CONTIGUOUS"]
16+
17+
dm = xgb.DMatrix(X, y, nthread=nthread)
18+
return dm
19+
20+
21+
def benchmark_dmatrix_creation(test_contiguous, nthread, rows, cols):
22+
X = np.random.randn(rows, cols)
23+
y = np.random.randn(
24+
rows,
25+
).astype(np.float32)
26+
27+
start_time = timeit.default_timer()
28+
dm = create_dmatrix(X, y, test_contiguous, nthread)
29+
end_time = timeit.default_timer()
30+
31+
np.testing.assert_allclose(dm.get_data().toarray(), X, rtol=1e-7)
32+
np.testing.assert_array_equal(dm.get_label(), y)
33+
assert dm.num_row() == rows
34+
assert dm.num_col() == cols
35+
36+
total_time = end_time - start_time
37+
38+
return total_time
39+
40+
41+
REPEATS = 10
42+
43+
contiguous = defaultdict(float)
44+
noncontiguous = defaultdict(float)
45+
46+
print(
47+
f"{'Rows':8} | {'Cols':8} | {'Threads':12} | {'Contiguous':15} | {'Non-contiguous':15} | {'Ratio':12}"
48+
)
49+
50+
for rows, cols, repeats in ((15000, 100, 10), (2, 2000, 200)):
51+
for nthread in (0, 1, 2, 10):
52+
for _ in range(repeats):
53+
contiguous[nthread] += benchmark_dmatrix_creation(
54+
test_contiguous=True, nthread=nthread, rows=rows, cols=cols
55+
)
56+
noncontiguous[nthread] += benchmark_dmatrix_creation(
57+
test_contiguous=False, nthread=nthread, rows=rows, cols=cols
58+
)
59+
60+
contiguous = {k: v / repeats for k, v in contiguous.items()}
61+
noncontiguous = {k: v / repeats for k, v in noncontiguous.items()}
62+
63+
for k in contiguous:
64+
print(
65+
f"{rows:8} | {cols:8} | {k:12} | {contiguous[k]:15.4g} | {noncontiguous[k]:15.4g} | {contiguous[k] / noncontiguous[k]:12.1%}"
66+
)

tests/python/test_basic.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,47 @@ def test_dmatrix_numpy_init_omp(self):
210210
assert dm.num_row() == row
211211
assert dm.num_col() == cols
212212

213+
def _test_dmatrix_numpy_init_omp_contiguous(self, test_contiguous: bool):
214+
rows = [1000, 11326, 15000]
215+
cols = 50
216+
for row in rows:
217+
X = np.random.randn(row, cols)
218+
y = np.random.randn(row).astype("f")
219+
220+
# Ensure data is contiguous
221+
if test_contiguous:
222+
X = np.ascontiguousarray(X).astype(np.float32)
223+
y = np.ascontiguousarray(y).astype(np.float32)
224+
assert X.flags['C_CONTIGUOUS']
225+
else:
226+
X = np.asfortranarray(X)
227+
y = np.asfortranarray(y)
228+
assert not X.flags['C_CONTIGUOUS']
229+
230+
dm = xgb.DMatrix(X, y, nthread=0)
231+
np.testing.assert_allclose(dm.get_data().toarray(), X, rtol=1e-7)
232+
np.testing.assert_array_equal(dm.get_label(), y)
233+
assert dm.num_row() == row
234+
assert dm.num_col() == cols
235+
236+
dm = xgb.DMatrix(X, y, nthread=1)
237+
np.testing.assert_allclose(dm.get_data().toarray(), X, rtol=1e-7)
238+
np.testing.assert_array_equal(dm.get_label(), y)
239+
assert dm.num_row() == row
240+
assert dm.num_col() == cols
241+
242+
dm = xgb.DMatrix(X, y, nthread=10)
243+
np.testing.assert_allclose(dm.get_data().toarray(), X, rtol=1e-7)
244+
np.testing.assert_array_equal(dm.get_label(), y)
245+
assert dm.num_row() == row
246+
assert dm.num_col() == cols
247+
248+
def test_dmatrix_numpy_init_omp_contiguous(self):
249+
return self._test_dmatrix_numpy_init_omp_contiguous(True)
250+
251+
def test_dmatrix_numpy_init_omp_not_contiguous(self):
252+
return self._test_dmatrix_numpy_init_omp_contiguous(False)
253+
213254
def test_cv(self):
214255
dm, _ = tm.load_agaricus(__file__)
215256
params = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}

0 commit comments

Comments
 (0)