Skip to content

Commit

Permalink
[pylint] Detect pathlib.Path.open calls in unspecified-encoding
Browse files Browse the repository at this point in the history
… (`PLW1514`) (#11288)

<!--
Thank you for contributing to Ruff! To help us out with reviewing,
please consider the following:

- Does this pull request include a summary of the change? (See below.)
- Does this pull request include a descriptive title?
- Does this pull request include references to any relevant issues?
-->

## Summary

Resolves #11263

Detect `pathlib.Path.open` calls which do not specify a file encoding.

## Test Plan

Test cases added to fixture.

---------

Co-authored-by: Dhruv Manilawala <[email protected]>
  • Loading branch information
augustelalande and dhruvmanila authored May 9, 2024
1 parent c80c171 commit dd42961
Show file tree
Hide file tree
Showing 4 changed files with 322 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,28 @@ def func(*args, **kwargs):
(("test.txt")),
# comment
)

# pathlib
from pathlib import Path

# Errors.
Path("foo.txt").open()
Path("foo.txt").open("w")
text = Path("foo.txt").read_text()
Path("foo.txt").write_text(text)

# Non-errors.
Path("foo.txt").open(encoding="utf-8")
Path("foo.txt").open("wb")
Path("foo.txt").open(*args)
Path("foo.txt").open(**kwargs)
text = Path("foo.txt").read_text(encoding="utf-8")
text = Path("foo.txt").read_text(*args)
text = Path("foo.txt").read_text(**kwargs)
Path("foo.txt").write_text(text, encoding="utf-8")
Path("foo.txt").write_text(text, *args)
Path("foo.txt").write_text(text, **kwargs)

# Violation but not detectable
x = Path("foo.txt")
x.open()
166 changes: 117 additions & 49 deletions crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use std::fmt::{Display, Formatter};

use anyhow::Result;

use ast::StringLiteralFlags;
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast as ast;
use ruff_python_ast::name::QualifiedName;
use ruff_python_ast::Expr;
use ruff_python_ast::{self as ast, Expr, StringLiteralFlags};
use ruff_python_semantic::SemanticModel;
use ruff_text_size::{Ranged, TextRange};

use crate::checkers::ast::Checker;
Expand Down Expand Up @@ -43,7 +44,7 @@ use crate::settings::types::PythonVersion;
#[violation]
pub struct UnspecifiedEncoding {
function_name: String,
mode: Mode,
mode: ModeArgument,
}

impl AlwaysFixableViolation for UnspecifiedEncoding {
Expand All @@ -55,10 +56,10 @@ impl AlwaysFixableViolation for UnspecifiedEncoding {
} = self;

match mode {
Mode::Supported => {
ModeArgument::Supported => {
format!("`{function_name}` in text mode without explicit `encoding` argument")
}
Mode::Unsupported => {
ModeArgument::Unsupported => {
format!("`{function_name}` without explicit `encoding` argument")
}
}
Expand All @@ -71,11 +72,9 @@ impl AlwaysFixableViolation for UnspecifiedEncoding {

/// PLW1514
pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall) {
let Some((function_name, mode)) = checker
.semantic()
.resolve_qualified_name(&call.func)
.filter(|qualified_name| is_violation(call, qualified_name))
.map(|qualified_name| (qualified_name.to_string(), Mode::from(&qualified_name)))
let Some((function_name, mode)) = Callee::try_from_call_expression(call, checker.semantic())
.filter(|segments| is_violation(call, segments))
.map(|segments| (segments.to_string(), segments.mode_argument()))
else {
return;
};
Expand All @@ -97,6 +96,68 @@ pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall)
checker.diagnostics.push(diagnostic);
}

/// Represents the path of the function or method being called.
enum Callee<'a> {
/// Fully-qualified symbol name of the callee.
Qualified(QualifiedName<'a>),
/// Attribute value for the `pathlib.Path(...)` call e.g., `open` in
/// `pathlib.Path(...).open(...)`.
Pathlib(&'a str),
}

impl<'a> Callee<'a> {
fn try_from_call_expression(
call: &'a ast::ExprCall,
semantic: &'a SemanticModel,
) -> Option<Self> {
if let Expr::Attribute(ast::ExprAttribute { attr, value, .. }) = call.func.as_ref() {
// Check for `pathlib.Path(...).open(...)` or equivalent
if let Expr::Call(ast::ExprCall { func, .. }) = value.as_ref() {
if semantic
.resolve_qualified_name(func)
.is_some_and(|qualified_name| {
matches!(qualified_name.segments(), ["pathlib", "Path"])
})
{
return Some(Callee::Pathlib(attr));
}
}
}

if let Some(qualified_name) = semantic.resolve_qualified_name(&call.func) {
return Some(Callee::Qualified(qualified_name));
}

None
}

fn mode_argument(&self) -> ModeArgument {
match self {
Callee::Qualified(qualified_name) => match qualified_name.segments() {
["" | "codecs" | "_io", "open"] => ModeArgument::Supported,
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
ModeArgument::Supported
}
["io" | "_io", "TextIOWrapper"] => ModeArgument::Unsupported,
_ => ModeArgument::Unsupported,
},
Callee::Pathlib(attr) => match *attr {
"open" => ModeArgument::Supported,
_ => ModeArgument::Unsupported,
},
}
}
}

impl Display for Callee<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Callee::Qualified(qualified_name) => f.write_str(&qualified_name.to_string()),
Callee::Pathlib(attr) => f.write_str(&format!("pathlib.Path(...).{attr}")),
}
}
}

/// Generate an [`Edit`] for Python 3.10 and later.
fn generate_keyword_fix(checker: &Checker, call: &ast::ExprCall) -> Fix {
Fix::unsafe_edit(add_argument(
Expand Down Expand Up @@ -146,7 +207,7 @@ fn is_binary_mode(expr: &Expr) -> Option<bool> {
}

/// Returns `true` if the given call lacks an explicit `encoding`.
fn is_violation(call: &ast::ExprCall, qualified_name: &QualifiedName) -> bool {
fn is_violation(call: &ast::ExprCall, qualified_name: &Callee) -> bool {
// If we have something like `*args`, which might contain the encoding argument, abort.
if call.arguments.args.iter().any(Expr::is_starred_expr) {
return false;
Expand All @@ -160,54 +221,61 @@ fn is_violation(call: &ast::ExprCall, qualified_name: &QualifiedName) -> bool {
{
return false;
}
match qualified_name.segments() {
["" | "codecs" | "_io", "open"] => {
if let Some(mode_arg) = call.arguments.find_argument("mode", 1) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
return false;
match qualified_name {
Callee::Qualified(qualified_name) => match qualified_name.segments() {
["" | "codecs" | "_io", "open"] => {
if let Some(mode_arg) = call.arguments.find_argument("mode", 1) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
return false;
}
}
// else mode not specified, defaults to text mode
call.arguments.find_argument("encoding", 3).is_none()
}
// else mode not specified, defaults to text mode
call.arguments.find_argument("encoding", 3).is_none()
}
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
let mode_pos = usize::from(qualified_name.segments()[1] == "SpooledTemporaryFile");
if let Some(mode_arg) = call.arguments.find_argument("mode", mode_pos) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
["tempfile", tempfile_class @ ("TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile")] =>
{
let mode_pos = usize::from(*tempfile_class == "SpooledTemporaryFile");
if let Some(mode_arg) = call.arguments.find_argument("mode", mode_pos) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
return false;
}
} else {
// defaults to binary mode
return false;
}
} else {
// defaults to binary mode
return false;
call.arguments
.find_argument("encoding", mode_pos + 2)
.is_none()
}
call.arguments
.find_argument("encoding", mode_pos + 2)
.is_none()
}
["io" | "_io", "TextIOWrapper"] => call.arguments.find_argument("encoding", 1).is_none(),
_ => false,
["io" | "_io", "TextIOWrapper"] => {
call.arguments.find_argument("encoding", 1).is_none()
}
_ => false,
},
Callee::Pathlib(attr) => match *attr {
"open" => {
if let Some(mode_arg) = call.arguments.find_argument("mode", 0) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
return false;
}
}
// else mode not specified, defaults to text mode
call.arguments.find_argument("encoding", 2).is_none()
}
"read_text" => call.arguments.find_argument("encoding", 0).is_none(),
"write_text" => call.arguments.find_argument("encoding", 1).is_none(),
_ => false,
},
}
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Mode {
enum ModeArgument {
/// The call supports a `mode` argument.
Supported,
/// The call does not support a `mode` argument.
Unsupported,
}

impl From<&QualifiedName<'_>> for Mode {
fn from(value: &QualifiedName<'_>) -> Self {
match value.segments() {
["" | "codecs" | "_io", "open"] => Mode::Supported,
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
Mode::Supported
}
["io" | "_io", "TextIOWrapper"] => Mode::Unsupported,
_ => Mode::Unsupported,
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -352,5 +352,86 @@ unspecified_encoding.py:68:1: PLW1514 [*] `open` in text mode without explicit `
69 |+ (("test.txt")), encoding="locale",
70 70 | # comment
71 71 | )
72 72 |

unspecified_encoding.py:77:1: PLW1514 [*] `pathlib.Path(...).open` in text mode without explicit `encoding` argument
|
76 | # Errors.
77 | Path("foo.txt").open()
| ^^^^^^^^^^^^^^^^^^^^ PLW1514
78 | Path("foo.txt").open("w")
79 | text = Path("foo.txt").read_text()
|
= help: Add explicit `encoding` argument

Unsafe fix
74 74 | from pathlib import Path
75 75 |
76 76 | # Errors.
77 |-Path("foo.txt").open()
77 |+Path("foo.txt").open(encoding="locale")
78 78 | Path("foo.txt").open("w")
79 79 | text = Path("foo.txt").read_text()
80 80 | Path("foo.txt").write_text(text)

unspecified_encoding.py:78:1: PLW1514 [*] `pathlib.Path(...).open` in text mode without explicit `encoding` argument
|
76 | # Errors.
77 | Path("foo.txt").open()
78 | Path("foo.txt").open("w")
| ^^^^^^^^^^^^^^^^^^^^ PLW1514
79 | text = Path("foo.txt").read_text()
80 | Path("foo.txt").write_text(text)
|
= help: Add explicit `encoding` argument

Unsafe fix
75 75 |
76 76 | # Errors.
77 77 | Path("foo.txt").open()
78 |-Path("foo.txt").open("w")
78 |+Path("foo.txt").open("w", encoding="locale")
79 79 | text = Path("foo.txt").read_text()
80 80 | Path("foo.txt").write_text(text)
81 81 |

unspecified_encoding.py:79:8: PLW1514 [*] `pathlib.Path(...).read_text` without explicit `encoding` argument
|
77 | Path("foo.txt").open()
78 | Path("foo.txt").open("w")
79 | text = Path("foo.txt").read_text()
| ^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514
80 | Path("foo.txt").write_text(text)
|
= help: Add explicit `encoding` argument

Unsafe fix
76 76 | # Errors.
77 77 | Path("foo.txt").open()
78 78 | Path("foo.txt").open("w")
79 |-text = Path("foo.txt").read_text()
79 |+text = Path("foo.txt").read_text(encoding="locale")
80 80 | Path("foo.txt").write_text(text)
81 81 |
82 82 | # Non-errors.

unspecified_encoding.py:80:1: PLW1514 [*] `pathlib.Path(...).write_text` without explicit `encoding` argument
|
78 | Path("foo.txt").open("w")
79 | text = Path("foo.txt").read_text()
80 | Path("foo.txt").write_text(text)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514
81 |
82 | # Non-errors.
|
= help: Add explicit `encoding` argument

Unsafe fix
77 77 | Path("foo.txt").open()
78 78 | Path("foo.txt").open("w")
79 79 | text = Path("foo.txt").read_text()
80 |-Path("foo.txt").write_text(text)
80 |+Path("foo.txt").write_text(text, encoding="locale")
81 81 |
82 82 | # Non-errors.
83 83 | Path("foo.txt").open(encoding="utf-8")
Loading

0 comments on commit dd42961

Please sign in to comment.