diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 44cc9f6d8d8c0..6a35a41e47649 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -4,7 +4,6 @@ from typing import ( TYPE_CHECKING, Sequence, - cast, ) import warnings @@ -221,27 +220,23 @@ def concatenate_managers( return BlockManager((nb,), axes) mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers) - if len(mgrs_indexers) == 1: - mgr, indexers = mgrs_indexers[0] - # Assertion correct but disabled for perf: - # assert not indexers - if copy: - out = mgr.copy(deep=True) - else: - out = mgr.copy(deep=False) - out.axes = axes - return out concat_plan = _get_combined_plan([mgr for mgr, _ in mgrs_indexers]) blocks = [] - values: ArrayLike for placement, join_units in concat_plan: unit = join_units[0] blk = unit.block - if _is_uniform_join_units(join_units): + if len(join_units) == 1: + values = blk.values + if copy: + values = values.copy() + else: + values = values.view() + fastpath = True + elif _is_uniform_join_units(join_units): vals = [ju.block.values for ju in join_units] if not blk.is_extension: @@ -532,7 +527,8 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: if upcasted_na is None and self.block.dtype.kind != "V": # No upcasting is necessary - return self.block.values + fill_value = self.block.fill_value + values = self.block.values else: fill_value = upcasted_na @@ -544,13 +540,30 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: # we want to avoid filling with np.nan if we are # using None; we already know that we are all # nulls - values = cast(np.ndarray, self.block.values) - if values.size and values[0, 0] is None: + values = self.block.values.ravel(order="K") + if len(values) and values[0] is None: fill_value = None return make_na_array(empty_dtype, self.block.shape, fill_value) - return self.block.values + if not self.block._can_consolidate: + # preserve these for validation in concat_compat + return self.block.values + + if self.block.is_bool: + # External code requested filling/upcasting, bool values must + # be upcasted to object to avoid being upcasted to numeric. + values = self.block.astype(np.dtype("object")).values + else: + # No dtype upcasting is done here, it will be performed during + # concatenation itself. + values = self.block.values + + # If there's no indexing to be done, we want to signal outside + # code that this array must be copied explicitly. This is done + # by returning a view and checking `retval.base`. + values = values.view() + return values def _concatenate_join_units(join_units: list[JoinUnit], copy: bool) -> ArrayLike: @@ -567,7 +580,19 @@ def _concatenate_join_units(join_units: list[JoinUnit], copy: bool) -> ArrayLike for ju in join_units ] - if any(is_1d_only_ea_dtype(t.dtype) for t in to_concat): + if len(to_concat) == 1: + # Only one block, nothing to concatenate. + concat_values = to_concat[0] + if copy: + if isinstance(concat_values, np.ndarray): + # non-reindexed (=not yet copied) arrays are made into a view + # in JoinUnit.get_reindexed_values + if concat_values.base is not None: + concat_values = concat_values.copy() + else: + concat_values = concat_values.copy() + + elif any(is_1d_only_ea_dtype(t.dtype) for t in to_concat): # TODO(EA2D): special case not needed if all EAs used HybridBlocks # error: No overload variant of "__getitem__" of "ExtensionArray" matches @@ -633,6 +658,10 @@ def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> tuple[DtypeObj, DtypeObj ------- dtype """ + if len(join_units) == 1: + blk = join_units[0].block + return blk.dtype, blk.dtype + if lib.dtypes_all_equal([ju.block.dtype for ju in join_units]): empty_dtype = join_units[0].block.dtype return empty_dtype, empty_dtype @@ -693,4 +722,7 @@ def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool: # no blocks that would get missing values (can lead to type upcasts) # unless we're an extension dtype. all(not ju.is_na or ju.block.is_extension for ju in join_units) - ) + and + # only use this path when there is something to concatenate + len(join_units) > 1 + ) \ No newline at end of file