From 6a6ce12152b5664c2bdaf7e39a82bbf398ba888e Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Fri, 23 Aug 2024 13:05:27 -0700 Subject: [PATCH] fix mypy errors --- benchmarking/tpch/data_generation.py | 2 +- benchmarking/tpch/pipelined_data_generation.py | 2 +- daft/context.py | 2 +- daft/dataframe/dataframe.py | 2 +- daft/expressions/expressions.py | 1 + tests/expressions/test_udf.py | 6 +++--- tests/integration/io/test_list_files_s3_minio.py | 2 +- tutorials/delta_lake/1-local-image-batch-inference.ipynb | 2 +- tutorials/delta_lake/2-distributed-batch-inference.ipynb | 2 +- tutorials/mnist.ipynb | 4 ++-- 10 files changed, 13 insertions(+), 12 deletions(-) diff --git a/benchmarking/tpch/data_generation.py b/benchmarking/tpch/data_generation.py index 1908828a40..84a7adca1f 100644 --- a/benchmarking/tpch/data_generation.py +++ b/benchmarking/tpch/data_generation.py @@ -253,7 +253,7 @@ def gen_csv_files(basedir: str, num_parts: int, scale_factor: float) -> str: Returns: str: path to folder with generated CSV files """ - cachedir = os.path.join(basedir, ("%.1f" % scale_factor).replace(".", "_"), str(num_parts)) + cachedir = os.path.join(basedir, (f"{scale_factor:.1f}").replace(".", "_"), str(num_parts)) if not os.path.exists(cachedir): # If running in CI, use a scale factor of 0.2 # Otherwise, check for SCALE_FACTOR env variable or default to 1 diff --git a/benchmarking/tpch/pipelined_data_generation.py b/benchmarking/tpch/pipelined_data_generation.py index 36d3629c67..f28063a990 100644 --- a/benchmarking/tpch/pipelined_data_generation.py +++ b/benchmarking/tpch/pipelined_data_generation.py @@ -48,7 +48,7 @@ def pipelined_data_generation( ): assert num_parts > 1, "script should only be used if num_parts > 1" - cachedir = pathlib.Path(scratch_dir) / ("%.1f" % scale_factor).replace(".", "_") / str(num_parts) + cachedir = pathlib.Path(scratch_dir) / (f"{scale_factor:.1f}").replace(".", "_") / str(num_parts) if not cachedir.exists(): logger.info("Cloning tpch dbgen repo") diff --git a/daft/context.py b/daft/context.py index f286c77c7b..38ef8545d5 100644 --- a/daft/context.py +++ b/daft/context.py @@ -17,7 +17,7 @@ class _RunnerConfig: - name = ClassVar[str] + name: ClassVar[str] @dataclasses.dataclass(frozen=True) diff --git a/daft/dataframe/dataframe.py b/daft/dataframe/dataframe.py index 3dd7458db4..37dea4d822 100644 --- a/daft/dataframe/dataframe.py +++ b/daft/dataframe/dataframe.py @@ -1984,7 +1984,7 @@ def transform(self, func: Callable[..., "DataFrame"], *args: Any, **kwargs: Any) """ result = func(self, *args, **kwargs) assert isinstance(result, DataFrame), ( - "Func returned an instance of type [%s], " "should have been DataFrame." % type(result) + f"Func returned an instance of type [{type(result)}], " "should have been DataFrame." ) return result diff --git a/daft/expressions/expressions.py b/daft/expressions/expressions.py index 8de584035b..9f5085ac3a 100644 --- a/daft/expressions/expressions.py +++ b/daft/expressions/expressions.py @@ -116,6 +116,7 @@ def lit(value: object) -> Expression: lit_value = _time_lit(i64_value, time_unit) elif isinstance(value, Decimal): sign, digits, exponent = value.as_tuple() + assert isinstance(exponent, int) lit_value = _decimal_lit(sign == 1, digits, exponent) elif isinstance(value, Series): lit_value = _series_lit(value._series) diff --git a/tests/expressions/test_udf.py b/tests/expressions/test_udf.py index 1c6c00d3aa..2572eb1adc 100644 --- a/tests/expressions/test_udf.py +++ b/tests/expressions/test_udf.py @@ -154,11 +154,11 @@ def test_udf_return_containers(container, batch_size): @udf(return_dtype=DataType.string(), batch_size=batch_size) def identity(data): - if container == Series: + if container is Series: return data - elif container == list: + elif container is list: return data.to_pylist() - elif container == np.ndarray: + elif container is np.ndarray: return np.array(data.to_arrow()) else: raise NotImplementedError(f"Test not implemented for container type: {container}") diff --git a/tests/integration/io/test_list_files_s3_minio.py b/tests/integration/io/test_list_files_s3_minio.py index b98100ef5c..5cdfd59c68 100644 --- a/tests/integration/io/test_list_files_s3_minio.py +++ b/tests/integration/io/test_list_files_s3_minio.py @@ -213,7 +213,7 @@ def test_directory_globbing_fragment_wildcard(minio_io_config, path_expect_pair, for name in files: fs.touch(f"bucket/{name}") - if type(expect) == type and issubclass(expect, BaseException): + if type(expect) is type and issubclass(expect, BaseException): with pytest.raises(expect): io_glob(globpath, io_config=minio_io_config, fanout_limit=fanout_limit) else: diff --git a/tutorials/delta_lake/1-local-image-batch-inference.ipynb b/tutorials/delta_lake/1-local-image-batch-inference.ipynb index 3d39917ba1..96d2562975 100644 --- a/tutorials/delta_lake/1-local-image-batch-inference.ipynb +++ b/tutorials/delta_lake/1-local-image-batch-inference.ipynb @@ -381,7 +381,7 @@ " batch = self.preprocess(images_array)\n", " prediction = self.model(batch).softmax(0)\n", " class_ids = prediction.argmax(1)\n", - " scores = prediction[:, class_ids]\n", + " prediction[:, class_ids]\n", " return [self.category_map[class_id] for class_id in class_ids]" ] }, diff --git a/tutorials/delta_lake/2-distributed-batch-inference.ipynb b/tutorials/delta_lake/2-distributed-batch-inference.ipynb index 41a6bb315a..8462a74e0f 100644 --- a/tutorials/delta_lake/2-distributed-batch-inference.ipynb +++ b/tutorials/delta_lake/2-distributed-batch-inference.ipynb @@ -337,7 +337,7 @@ " batch = self.preprocess(images_array)\n", " prediction = self.model(batch).softmax(0)\n", " class_ids = prediction.argmax(1)\n", - " scores = prediction[:, class_ids]\n", + " prediction[:, class_ids]\n", " return [self.category_map[class_id] for class_id in class_ids]\n", "\n", "\n", diff --git a/tutorials/mnist.ipynb b/tutorials/mnist.ipynb index 776ca0937a..28973d1b47 100644 --- a/tutorials/mnist.ipynb +++ b/tutorials/mnist.ipynb @@ -235,7 +235,7 @@ "\n", "images_df = images_df.with_column(\n", " \"image_2d\",\n", - " col(\"image\").apply(lambda l: np.array(l).reshape(28, 28), return_dtype=DataType.python()),\n", + " col(\"image\").apply(lambda img: np.array(img).reshape(28, 28), return_dtype=DataType.python()),\n", ")" ] }, @@ -495,7 +495,7 @@ "\n", "class Net(nn.Module):\n", " def __init__(self):\n", - " super(Net, self).__init__()\n", + " super().__init__()\n", " self.conv1 = nn.Conv2d(1, 32, 3, 1)\n", " self.conv2 = nn.Conv2d(32, 64, 3, 1)\n", " self.dropout1 = nn.Dropout(0.25)\n",