CofeehousePy/deps/numpy/benchmarks/benchmarks/bench_ufunc.py

210 lines
6.2 KiB
Python

from .common import Benchmark, get_squares_
import numpy as np
ufuncs = ['abs', 'absolute', 'add', 'arccos', 'arccosh', 'arcsin', 'arcsinh',
'arctan', 'arctan2', 'arctanh', 'bitwise_and', 'bitwise_not',
'bitwise_or', 'bitwise_xor', 'cbrt', 'ceil', 'conj', 'conjugate',
'copysign', 'cos', 'cosh', 'deg2rad', 'degrees', 'divide', 'divmod',
'equal', 'exp', 'exp2', 'expm1', 'fabs', 'float_power', 'floor',
'floor_divide', 'fmax', 'fmin', 'fmod', 'frexp', 'gcd', 'greater',
'greater_equal', 'heaviside', 'hypot', 'invert', 'isfinite',
'isinf', 'isnan', 'isnat', 'lcm', 'ldexp', 'left_shift', 'less',
'less_equal', 'log', 'log10', 'log1p', 'log2', 'logaddexp',
'logaddexp2', 'logical_and', 'logical_not', 'logical_or',
'logical_xor', 'matmul', 'maximum', 'minimum', 'mod', 'modf', 'multiply',
'negative', 'nextafter', 'not_equal', 'positive', 'power',
'rad2deg', 'radians', 'reciprocal', 'remainder', 'right_shift',
'rint', 'sign', 'signbit', 'sin', 'sinh', 'spacing', 'sqrt',
'square', 'subtract', 'tan', 'tanh', 'true_divide', 'trunc']
for name in dir(np):
if isinstance(getattr(np, name, None), np.ufunc) and name not in ufuncs:
print("Missing ufunc %r" % (name,))
class Broadcast(Benchmark):
def setup(self):
self.d = np.ones((50000, 100), dtype=np.float64)
self.e = np.ones((100,), dtype=np.float64)
def time_broadcast(self):
self.d - self.e
class UFunc(Benchmark):
params = [ufuncs]
param_names = ['ufunc']
timeout = 10
def setup(self, ufuncname):
np.seterr(all='ignore')
try:
self.f = getattr(np, ufuncname)
except AttributeError:
raise NotImplementedError()
self.args = []
for t, a in get_squares_().items():
arg = (a,) * self.f.nin
try:
self.f(*arg)
except TypeError:
continue
self.args.append(arg)
def time_ufunc_types(self, ufuncname):
[self.f(*arg) for arg in self.args]
class Custom(Benchmark):
def setup(self):
self.b = np.ones(20000, dtype=bool)
def time_nonzero(self):
np.nonzero(self.b)
def time_not_bool(self):
(~self.b)
def time_and_bool(self):
(self.b & self.b)
def time_or_bool(self):
(self.b | self.b)
class CustomInplace(Benchmark):
def setup(self):
self.c = np.ones(500000, dtype=np.int8)
self.i = np.ones(150000, dtype=np.int32)
self.f = np.zeros(150000, dtype=np.float32)
self.d = np.zeros(75000, dtype=np.float64)
# fault memory
self.f *= 1.
self.d *= 1.
def time_char_or(self):
np.bitwise_or(self.c, 0, out=self.c)
np.bitwise_or(0, self.c, out=self.c)
def time_char_or_temp(self):
0 | self.c | 0
def time_int_or(self):
np.bitwise_or(self.i, 0, out=self.i)
np.bitwise_or(0, self.i, out=self.i)
def time_int_or_temp(self):
0 | self.i | 0
def time_float_add(self):
np.add(self.f, 1., out=self.f)
np.add(1., self.f, out=self.f)
def time_float_add_temp(self):
1. + self.f + 1.
def time_double_add(self):
np.add(self.d, 1., out=self.d)
np.add(1., self.d, out=self.d)
def time_double_add_temp(self):
1. + self.d + 1.
class CustomScalar(Benchmark):
params = [np.float32, np.float64]
param_names = ['dtype']
def setup(self, dtype):
self.d = np.ones(20000, dtype=dtype)
def time_add_scalar2(self, dtype):
np.add(self.d, 1)
def time_divide_scalar2(self, dtype):
np.divide(self.d, 1)
def time_divide_scalar2_inplace(self, dtype):
np.divide(self.d, 1, out=self.d)
def time_less_than_scalar2(self, dtype):
(self.d < 1)
class Scalar(Benchmark):
def setup(self):
self.x = np.asarray(1.0)
self.y = np.asarray((1.0 + 1j))
self.z = complex(1.0, 1.0)
def time_add_scalar(self):
(self.x + self.x)
def time_add_scalar_conv(self):
(self.x + 1.0)
def time_add_scalar_conv_complex(self):
(self.y + self.z)
class ArgPack:
__slots__ = ['args', 'kwargs']
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
def __repr__(self):
return '({})'.format(', '.join(
[repr(a) for a in self.args] +
['{}={}'.format(k, repr(v)) for k, v in self.kwargs.items()]
))
class ArgParsing(Benchmark):
# In order to benchmark the speed of argument parsing, all but the
# out arguments are chosen such that they have no effect on the
# calculation. In particular, subok=True and where=True are
# defaults, and the dtype is the correct one (the latter will
# still have some effect on the search for the correct inner loop).
x = np.array(1.)
y = np.array(2.)
out = np.array(3.)
param_names = ['arg_kwarg']
params = [[
ArgPack(x, y),
ArgPack(x, y, out),
ArgPack(x, y, out=out),
ArgPack(x, y, out=(out,)),
ArgPack(x, y, out=out, subok=True, where=True),
ArgPack(x, y, subok=True),
ArgPack(x, y, subok=True, where=True),
ArgPack(x, y, out, subok=True, where=True)
]]
def time_add_arg_parsing(self, arg_pack):
np.add(*arg_pack.args, **arg_pack.kwargs)
class ArgParsingReduce(Benchmark):
# In order to benchmark the speed of argument parsing, all but the
# out arguments are chosen such that they have minimal effect on the
# calculation.
a = np.arange(2.)
out = np.array(0.)
param_names = ['arg_kwarg']
params = [[
ArgPack(a,),
ArgPack(a, 0),
ArgPack(a, axis=0),
ArgPack(a, 0, None),
ArgPack(a, axis=0, dtype=None),
ArgPack(a, 0, None, out),
ArgPack(a, axis=0, dtype=None, out=out),
ArgPack(a, out=out)
]]
def time_add_reduce_arg_parsing(self, arg_pack):
np.add.reduce(*arg_pack.args, **arg_pack.kwargs)