Skip to content

Commit

Permalink
PERF: DataFrame.__setitem__ (pandas-dev#44796)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Dec 11, 2021
1 parent a48f451 commit 351b688
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 14 deletions.
3 changes: 2 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1457,7 +1457,8 @@ def iget(self, col):
def set_inplace(self, locs, values) -> None:
# NB: This is a misnomer, is supposed to be inplace but is not,
# see GH#33457
assert locs.tolist() == [0]
# When an ndarray, we should have locs.tolist() == [0]
# When a BlockPlacement we should have list(locs) == [0]
self.values = values
try:
# TODO(GH33457) this can be removed
Expand Down
70 changes: 57 additions & 13 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1066,22 +1066,12 @@ def iset(

# Note: we exclude DTA/TDA here
value_is_extension_type = is_1d_only_ea_dtype(value.dtype)

# categorical/sparse/datetimetz
if value_is_extension_type:

def value_getitem(placement):
return value

else:
if not value_is_extension_type:
if value.ndim == 2:
value = value.T
else:
value = ensure_block_shape(value, ndim=2)

def value_getitem(placement):
return value[placement.indexer]

if value.shape[1:] != self.shape[1:]:
raise AssertionError(
"Shape of new values must be compatible with manager shape"
Expand All @@ -1092,11 +1082,37 @@ def value_getitem(placement):
# In this case, get_blkno_placements will yield only one tuple,
# containing (self._blknos[loc], BlockPlacement(slice(0, 1, 1)))

# Check if we can use _iset_single fastpath
blkno = self.blknos[loc]
blk = self.blocks[blkno]
if len(blk._mgr_locs) == 1: # TODO: fastest way to check this?
return self._iset_single(
# error: Argument 1 to "_iset_single" of "BlockManager" has
# incompatible type "Union[int, slice, ndarray[Any, Any]]";
# expected "int"
loc, # type:ignore[arg-type]
value,
inplace=inplace,
blkno=blkno,
blk=blk,
)

# error: Incompatible types in assignment (expression has type
# "List[Union[int, slice, ndarray]]", variable has type "Union[int,
# slice, ndarray]")
loc = [loc] # type: ignore[assignment]

# categorical/sparse/datetimetz
if value_is_extension_type:

def value_getitem(placement):
return value

else:

def value_getitem(placement):
return value[placement.indexer]

# Accessing public blknos ensures the public versions are initialized
blknos = self.blknos[loc]
blklocs = self.blklocs[loc].copy()
Expand Down Expand Up @@ -1172,6 +1188,29 @@ def value_getitem(placement):
# Newly created block's dtype may already be present.
self._known_consolidated = False

def _iset_single(
self, loc: int, value: ArrayLike, inplace: bool, blkno: int, blk: Block
) -> None:
"""
Fastpath for iset when we are only setting a single position and
the Block currently in that position is itself single-column.
In this case we can swap out the entire Block and blklocs and blknos
are unaffected.
"""
# Caller is responsible for verifying value.shape

if inplace and blk.should_store(value):
iloc = self.blklocs[loc]
blk.set_inplace(slice(iloc, iloc + 1), value)
return

nb = new_block_2d(value, placement=blk._mgr_locs)
old_blocks = self.blocks
new_blocks = old_blocks[:blkno] + (nb,) + old_blocks[blkno + 1 :]
self.blocks = new_blocks
return

def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
"""
Insert item at selected position.
Expand All @@ -1197,8 +1236,13 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
bp = BlockPlacement(slice(loc, loc + 1))
block = new_block_2d(values=value, placement=bp)

self._insert_update_mgr_locs(loc)
self._insert_update_blklocs_and_blknos(loc)
if not len(self.blocks):
# Fastpath
self._blklocs = np.array([0], dtype=np.intp)
self._blknos = np.array([0], dtype=np.intp)
else:
self._insert_update_mgr_locs(loc)
self._insert_update_blklocs_and_blknos(loc)

self.axes[0] = new_axis
self.blocks += (block,)
Expand Down

0 comments on commit 351b688

Please sign in to comment.