BUG: setting td64 value into Series[numeric] incorretly casts to int

pandas-dev · jreback · Feb 3, 2021 · Jan 30, 2021 · Jan 30, 2021 · Jan 30, 2021
commit 588a3ac4516b5b8315b2fa0f903796cd05ebeffd
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -4331,7 +4331,14 @@ def where(self, cond, other=None):
         except (ValueError, TypeError):
             return self.astype(object).where(cond, other)
 
-        values = np.where(cond, values, other)
+        if isinstance(other, np.timedelta64) and self.dtype == object:
+            # http://github.com/numpy/numpy/issues/12550
+            #  timedelta64 will incorrectly cast to int
+            other = [other] * (~cond).sum()
+            values = values.copy()
+            values[~cond] = other
+        else:
+            values = np.where(cond, values, other)
 
         return Index(values, name=self.name)
 

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -29,7 +29,6 @@
     infer_dtype_from,
     maybe_downcast_numeric,
     maybe_downcast_to_dtype,
-    maybe_promote,
     maybe_upcast,
     soft_convert_objects,
 )
@@ -1031,6 +1030,12 @@ def putmask(self, mask, new) -> List[Block]:
         elif not mask.any():
             return [self]
 
+        elif isinstance(new, np.timedelta64):
+            # using putmask with object dtype will incorrect cast to object
+            # Having excluded self._can_hold_element, we know we cannot operate
+            #  in-place, so we are safe using `where`
+            return self.where(new, ~mask)
+
         else:
             # may need to upcast
             if transpose:
@@ -1052,7 +1057,7 @@ def f(mask, val, idx):
                         n = np.array(new)
 
                     # type of the new block
-                    dtype, _ = maybe_promote(n.dtype)
+                    dtype = find_common_type([n.dtype, val.dtype])
 
                     # we need to explicitly astype here to make a copy
                     n = n.astype(dtype)
@@ -1309,12 +1314,18 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]:
                 blocks = block.where(orig_other, cond, errors=errors, axis=axis)
                 return self._maybe_downcast(blocks, "infer")
 
-            # convert datetime to datetime64, timedelta to timedelta64
-            other = convert_scalar_for_putitemlike(other, values.dtype)
+            elif isinstance(other, np.timedelta64):
+                # expressions.where will cast np.timedelta64 to int
+                result = self.values.copy()
+                result[~cond] = [other] * (~cond).sum()
+
+            else:
+                # convert datetime to datetime64, timedelta to timedelta64
+                other = convert_scalar_for_putitemlike(other, values.dtype)
 
-            # By the time we get here, we should have all Series/Index
-            #  args extracted to ndarray
-            result = expressions.where(cond, values, other)
+                # By the time we get here, we should have all Series/Index
+                #  args extracted to ndarray
+                result = expressions.where(cond, values, other)
 
         if self._can_hold_na or self.ndim == 1:
 

diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
@@ -494,3 +494,126 @@ def test_setitem_td64_into_complex(key, dtype, indexer_sli):
         indexer_sli(ser)[key] = np.full((1,), td)
         assert ser.dtype == object
         assert arr[0] == 0  # original array is unchanged
+
+
+class TestSetitemCastingEquivalentsTimedelta64IntoNumeric:
+    # timedelta64 should not be treated as integers when setting into
+    #  numeric Series
+
+    @pytest.fixture
+    def val(self):
+        td = np.timedelta64(4, "ns")
+        return td
+        return np.full((1,), td)
+
+    @pytest.fixture(params=[complex, int, float])
+    def dtype(self, request):
+        return request.param
+
+    @pytest.fixture
+    def obj(self, dtype):
+        arr = np.arange(5).astype(dtype)
+        ser = Series(arr)
+        return ser
+
+    @pytest.fixture
+    def expected(self, dtype):
+        arr = np.arange(5).astype(dtype)
+        ser = Series(arr)
+        ser = ser.astype(object)
+        ser.values[0] = np.timedelta64(4, "ns")
+        return ser
+
+    @pytest.fixture
+    def key(self):
+        return 0
+
+    def check_indexer(self, obj, key, expected, val, indexer):
+        orig = obj
+        obj = obj.copy()
+        arr = obj._values
+
+        indexer(obj)[key] = val
+        tm.assert_series_equal(obj, expected)
+
+        tm.assert_equal(arr, orig._values)  # original  array is unchanged
+
+    def test_int_key(self, obj, key, expected, val, indexer_sli):
+        if not isinstance(key, int):
+            return
+
+        self.check_indexer(obj, key, expected, val, indexer_sli)
+
+        rng = range(key, key + 1)
+        self.check_indexer(obj, rng, expected, val, indexer_sli)
+
+        if indexer_sli is not tm.loc:
+            # Note: no .loc because that handles slice edges differently
+            slc = slice(key, key + 1)
+            self.check_indexer(obj, slc, expected, val, indexer_sli)
+
+        ilkey = [key]
+        self.check_indexer(obj, ilkey, expected, val, indexer_sli)
+
+        indkey = np.array(ilkey)
+        self.check_indexer(obj, indkey, expected, val, indexer_sli)
+
+    def test_slice_key(self, obj, key, expected, val, indexer_sli):
+        if not isinstance(key, slice):
+            return
+
+        if indexer_sli is not tm.loc:
+            # Note: no .loc because that handles slice edges differently
+            self.check_indexer(obj, key, expected, val, indexer_sli)
+
+        ilkey = list(range(len(obj)))[key]
+        self.check_indexer(obj, ilkey, expected, val, indexer_sli)
+
+        indkey = np.array(ilkey)
+        self.check_indexer(obj, indkey, expected, val, indexer_sli)
+
+    def test_mask_key(self, obj, key, expected, val, indexer_sli):
+        # setitem with boolean mask
+        mask = np.zeros(obj.shape, dtype=bool)
+        mask[key] = True
+
+        self.check_indexer(obj, mask, expected, val, indexer_sli)
+
+    def test_series_where(self, obj, key, expected, val):
+        mask = np.zeros(obj.shape, dtype=bool)
+        mask[key] = True
+
+        orig = obj
+        obj = obj.copy()
+        arr = obj._values
+        res = obj.where(~mask, val)
+        tm.assert_series_equal(res, expected)
+
+        tm.assert_equal(arr, orig._values)  # original  array is unchanged
+
+    def test_index_where(self, obj, key, expected, val, request):
+        if Index(obj).dtype != obj.dtype:
+            pytest.skip("test not applicable for this dtype")
+
+        mask = np.zeros(obj.shape, dtype=bool)
+        mask[key] = True
+
+        if obj.dtype == bool and not mask.all():
+            # When mask is all True, casting behavior does not apply
+            msg = "Index/Series casting behavior inconsistent GH#38692"
+            mark = pytest.mark.xfail(reason=msg)
+            request.node.add_marker(mark)
+
+        res = Index(obj).where(~mask, val)
+        tm.assert_index_equal(res, Index(expected))
+
+    @pytest.mark.xfail(reason="Index/Series casting behavior inconsistent GH#38692")
+    def test_index_putmask(self, obj, key, expected, val):
+        if Index(obj).dtype != obj.dtype:
+            pytest.skip("test not applicable for this dtype")
+
+        mask = np.zeros(obj.shape, dtype=bool)
+        mask[key] = True
+
+        res = Index(obj).putmask(mask, val)
+        tm.assert_index_equal(res, Index(expected))