Skip to content

BUG: setting td64 value into numeric Series incorrectly casting to int #39488

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 3, 2021
Next Next commit
BUG: setting td64 value into Series[numeric] incorretly casts to int
  • Loading branch information
jbrockmendel committed Jan 30, 2021
commit 588a3ac4516b5b8315b2fa0f903796cd05ebeffd
9 changes: 8 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4331,7 +4331,14 @@ def where(self, cond, other=None):
except (ValueError, TypeError):
return self.astype(object).where(cond, other)

values = np.where(cond, values, other)
if isinstance(other, np.timedelta64) and self.dtype == object:
# http://github.com/numpy/numpy/issues/12550
# timedelta64 will incorrectly cast to int
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

too bad you cannot use np.where then cast back the integers if necessary

other = [other] * (~cond).sum()
values = values.copy()
values[~cond] = other
else:
values = np.where(cond, values, other)

return Index(values, name=self.name)

Expand Down
25 changes: 18 additions & 7 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
infer_dtype_from,
maybe_downcast_numeric,
maybe_downcast_to_dtype,
maybe_promote,
maybe_upcast,
soft_convert_objects,
)
Expand Down Expand Up @@ -1031,6 +1030,12 @@ def putmask(self, mask, new) -> List[Block]:
elif not mask.any():
return [self]

elif isinstance(new, np.timedelta64):
# using putmask with object dtype will incorrect cast to object
# Having excluded self._can_hold_element, we know we cannot operate
# in-place, so we are safe using `where`
return self.where(new, ~mask)

else:
# may need to upcast
if transpose:
Expand All @@ -1052,7 +1057,7 @@ def f(mask, val, idx):
n = np.array(new)

# type of the new block
dtype, _ = maybe_promote(n.dtype)
dtype = find_common_type([n.dtype, val.dtype])

# we need to explicitly astype here to make a copy
n = n.astype(dtype)
Expand Down Expand Up @@ -1309,12 +1314,18 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]:
blocks = block.where(orig_other, cond, errors=errors, axis=axis)
return self._maybe_downcast(blocks, "infer")

# convert datetime to datetime64, timedelta to timedelta64
other = convert_scalar_for_putitemlike(other, values.dtype)
elif isinstance(other, np.timedelta64):
# expressions.where will cast np.timedelta64 to int
result = self.values.copy()
result[~cond] = [other] * (~cond).sum()

else:
# convert datetime to datetime64, timedelta to timedelta64
other = convert_scalar_for_putitemlike(other, values.dtype)

# By the time we get here, we should have all Series/Index
# args extracted to ndarray
result = expressions.where(cond, values, other)
# By the time we get here, we should have all Series/Index
# args extracted to ndarray
result = expressions.where(cond, values, other)

if self._can_hold_na or self.ndim == 1:

Expand Down
123 changes: 123 additions & 0 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,3 +494,126 @@ def test_setitem_td64_into_complex(key, dtype, indexer_sli):
indexer_sli(ser)[key] = np.full((1,), td)
assert ser.dtype == object
assert arr[0] == 0 # original array is unchanged


class TestSetitemCastingEquivalentsTimedelta64IntoNumeric:
# timedelta64 should not be treated as integers when setting into
# numeric Series

@pytest.fixture
def val(self):
td = np.timedelta64(4, "ns")
return td
return np.full((1,), td)

@pytest.fixture(params=[complex, int, float])
def dtype(self, request):
return request.param

@pytest.fixture
def obj(self, dtype):
arr = np.arange(5).astype(dtype)
ser = Series(arr)
return ser

@pytest.fixture
def expected(self, dtype):
arr = np.arange(5).astype(dtype)
ser = Series(arr)
ser = ser.astype(object)
ser.values[0] = np.timedelta64(4, "ns")
return ser

@pytest.fixture
def key(self):
return 0

def check_indexer(self, obj, key, expected, val, indexer):
orig = obj
obj = obj.copy()
arr = obj._values

indexer(obj)[key] = val
tm.assert_series_equal(obj, expected)

tm.assert_equal(arr, orig._values) # original array is unchanged

def test_int_key(self, obj, key, expected, val, indexer_sli):
if not isinstance(key, int):
return

self.check_indexer(obj, key, expected, val, indexer_sli)

rng = range(key, key + 1)
self.check_indexer(obj, rng, expected, val, indexer_sli)

if indexer_sli is not tm.loc:
# Note: no .loc because that handles slice edges differently
slc = slice(key, key + 1)
self.check_indexer(obj, slc, expected, val, indexer_sli)

ilkey = [key]
self.check_indexer(obj, ilkey, expected, val, indexer_sli)

indkey = np.array(ilkey)
self.check_indexer(obj, indkey, expected, val, indexer_sli)

def test_slice_key(self, obj, key, expected, val, indexer_sli):
if not isinstance(key, slice):
return

if indexer_sli is not tm.loc:
# Note: no .loc because that handles slice edges differently
self.check_indexer(obj, key, expected, val, indexer_sli)

ilkey = list(range(len(obj)))[key]
self.check_indexer(obj, ilkey, expected, val, indexer_sli)

indkey = np.array(ilkey)
self.check_indexer(obj, indkey, expected, val, indexer_sli)

def test_mask_key(self, obj, key, expected, val, indexer_sli):
# setitem with boolean mask
mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

self.check_indexer(obj, mask, expected, val, indexer_sli)

def test_series_where(self, obj, key, expected, val):
mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

orig = obj
obj = obj.copy()
arr = obj._values
res = obj.where(~mask, val)
tm.assert_series_equal(res, expected)

tm.assert_equal(arr, orig._values) # original array is unchanged

def test_index_where(self, obj, key, expected, val, request):
if Index(obj).dtype != obj.dtype:
pytest.skip("test not applicable for this dtype")

mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

if obj.dtype == bool and not mask.all():
# When mask is all True, casting behavior does not apply
msg = "Index/Series casting behavior inconsistent GH#38692"
mark = pytest.mark.xfail(reason=msg)
request.node.add_marker(mark)

res = Index(obj).where(~mask, val)
tm.assert_index_equal(res, Index(expected))

@pytest.mark.xfail(reason="Index/Series casting behavior inconsistent GH#38692")
def test_index_putmask(self, obj, key, expected, val):
if Index(obj).dtype != obj.dtype:
pytest.skip("test not applicable for this dtype")

mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

res = Index(obj).putmask(mask, val)
tm.assert_index_equal(res, Index(expected))