Skip to content

Commit

Permalink
[pycodestyle] Handle each cell separately for `too-many-newlines-at…
Browse files Browse the repository at this point in the history
…-end-of-file` (`W391`) (#15308)

Jupyter notebooks are converted into source files by joining with
newlines, which confuses the check [too-many-newlines-at-end-of-file
(W391)](https://docs.astral.sh/ruff/rules/too-many-newlines-at-end-of-file/#too-many-newlines-at-end-of-file-w391).
This PR introduces logic to apply the check cell-wise (and, in
particular, correctly handles empty cells.)

Closes #13763
  • Loading branch information
dylwil3 authored Jan 9, 2025
1 parent d0b2bbd commit b0905c4
Show file tree
Hide file tree
Showing 5 changed files with 261 additions and 31 deletions.
92 changes: 92 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/pycodestyle/W391.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n"
]
}
],
"source": [
"True"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# just a comment in this cell"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# a comment and some newlines\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"1 + 1\n",
"# a comment\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"1+1\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
6 changes: 5 additions & 1 deletion crates/ruff_linter/src/checkers/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,11 @@ pub(crate) fn check_tokens(
}

if settings.rules.enabled(Rule::TooManyNewlinesAtEndOfFile) {
pycodestyle::rules::too_many_newlines_at_end_of_file(&mut diagnostics, tokens);
pycodestyle::rules::too_many_newlines_at_end_of_file(
&mut diagnostics,
tokens,
cell_offsets,
);
}

diagnostics.retain(|diagnostic| settings.rules.enabled(diagnostic.kind.rule()));
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/pycodestyle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ mod tests {
#[test_case(Rule::TooManyNewlinesAtEndOfFile, Path::new("W391_2.py"))]
#[test_case(Rule::TooManyNewlinesAtEndOfFile, Path::new("W391_3.py"))]
#[test_case(Rule::TooManyNewlinesAtEndOfFile, Path::new("W391_4.py"))]
#[test_case(Rule::TooManyNewlinesAtEndOfFile, Path::new("W391.ipynb"))]
fn preview_rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!(
"preview__{}_{}",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
use std::iter::Peekable;

use itertools::Itertools;
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, ViolationMetadata};
use ruff_python_parser::{TokenKind, Tokens};
use ruff_notebook::CellOffsets;
use ruff_python_parser::{Token, TokenKind, Tokens};
use ruff_text_size::{Ranged, TextRange, TextSize};

/// ## What it does
/// Checks for files with multiple trailing blank lines.
///
/// In the case of notebooks, this check is applied to
/// each cell separately.
///
/// ## Why is this bad?
/// Trailing blank lines in a file are superfluous.
///
Expand All @@ -23,17 +30,19 @@ use ruff_text_size::{Ranged, TextRange, TextSize};
#[derive(ViolationMetadata)]
pub(crate) struct TooManyNewlinesAtEndOfFile {
num_trailing_newlines: u32,
in_notebook: bool,
}

impl AlwaysFixableViolation for TooManyNewlinesAtEndOfFile {
#[derive_message_formats]
fn message(&self) -> String {
let domain = if self.in_notebook { "cell" } else { "file" };
// We expect a single trailing newline; so two trailing newlines is one too many, three
// trailing newlines is two too many, etc.
if self.num_trailing_newlines > 2 {
"Too many newlines at end of file".to_string()
format!("Too many newlines at end of {domain}")
} else {
"Extra newline at end of file".to_string()
format!("Extra newline at end of {domain}")
}
}

Expand All @@ -48,42 +57,92 @@ impl AlwaysFixableViolation for TooManyNewlinesAtEndOfFile {
}

/// W391
pub(crate) fn too_many_newlines_at_end_of_file(diagnostics: &mut Vec<Diagnostic>, tokens: &Tokens) {
let mut num_trailing_newlines = 0u32;
let mut start: Option<TextSize> = None;
let mut end: Option<TextSize> = None;

// Count the number of trailing newlines.
for token in tokens.iter().rev() {
match token.kind() {
TokenKind::NonLogicalNewline | TokenKind::Newline => {
if num_trailing_newlines == 0 {
end = Some(token.end());
pub(crate) fn too_many_newlines_at_end_of_file(
diagnostics: &mut Vec<Diagnostic>,
tokens: &Tokens,
cell_offsets: Option<&CellOffsets>,
) {
let mut tokens_iter = tokens.iter().rev().peekable();

if let Some(cell_offsets) = cell_offsets {
diagnostics.extend(notebook_newline_diagnostics(tokens_iter, cell_offsets));
} else if let Some(diagnostic) = newline_diagnostic(&mut tokens_iter, false) {
diagnostics.push(diagnostic);
};
}

/// Collects trailing newline diagnostics for each cell
fn notebook_newline_diagnostics<'a>(
mut tokens_iter: Peekable<impl Iterator<Item = &'a Token>>,
cell_offsets: &CellOffsets,
) -> Vec<Diagnostic> {
let mut results = Vec::new();
let offset_iter = cell_offsets.iter().rev();

// NB: When interpreting the below, recall that the iterators
// have been reversed.
for &offset in offset_iter {
// Advance to offset
tokens_iter
.peeking_take_while(|tok| tok.end() >= offset)
.for_each(drop);

let Some(diagnostic) = newline_diagnostic(&mut tokens_iter, true) else {
continue;
};

results.push(diagnostic);
}
results
}

/// Possible diagnostic, with fix, for too many newlines in cell or source file
fn newline_diagnostic<'a>(
tokens_iter: &mut Peekable<impl Iterator<Item = &'a Token>>,
in_notebook: bool,
) -> Option<Diagnostic> {
let mut num_trailing_newlines: u32 = 0;
let mut newline_range_start: Option<TextSize> = None;
let mut newline_range_end: Option<TextSize> = None;

while let Some(next_token) = tokens_iter.peek() {
match next_token.kind() {
TokenKind::Newline | TokenKind::NonLogicalNewline => {
if newline_range_end.is_none() {
newline_range_end = Some(next_token.end());
}
start = Some(token.end());
newline_range_start = Some(next_token.end());

tokens_iter.next();
num_trailing_newlines += 1;
}
TokenKind::Dedent => continue,
TokenKind::Dedent => {
tokens_iter.next();
}
_ => {
break;
}
}
}

if num_trailing_newlines == 0 || num_trailing_newlines == 1 {
return;
}

let range = match (start, end) {
(Some(start), Some(end)) => TextRange::new(start, end),
_ => return,
return None;
};
let mut diagnostic = Diagnostic::new(
TooManyNewlinesAtEndOfFile {
num_trailing_newlines,
},
range,
);
diagnostic.set_fix(Fix::safe_edit(Edit::range_deletion(range)));
diagnostics.push(diagnostic);

let (start, end) = (match (newline_range_start, newline_range_end) {
(Some(s), Some(e)) => Some((s, e)),
_ => None,
})?;

let diagnostic_range = TextRange::new(start, end);
Some(
Diagnostic::new(
TooManyNewlinesAtEndOfFile {
num_trailing_newlines,
in_notebook,
},
diagnostic_range,
)
.with_fix(Fix::safe_edit(Edit::range_deletion(diagnostic_range))),
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
---
source: crates/ruff_linter/src/rules/pycodestyle/mod.rs
---
W391.ipynb:5:1: W391 [*] Too many newlines at end of cell
|
3 | # just a comment in this cell
4 | # a comment and some newlines
5 | /
6 | |
7 | |
8 | |
| |_^ W391
9 | 1 + 1
10 | # a comment
|
= help: Remove trailing newlines

Safe fix
3 3 | # just a comment in this cell
4 4 | # a comment and some newlines
5 5 |
6 |-
7 |-
8 |-
9 6 | 1 + 1
10 7 | # a comment
11 8 |

W391.ipynb:11:1: W391 [*] Too many newlines at end of cell
|
9 | 1 + 1
10 | # a comment
11 | /
12 | |
13 | |
14 | |
15 | |
| |_^ W391
16 | 1+1
|
= help: Remove trailing newlines

Safe fix
9 9 | 1 + 1
10 10 | # a comment
11 11 |
12 |-
13 |-
14 |-
15 |-
16 12 | 1+1
17 13 |
18 14 |

W391.ipynb:17:1: W391 [*] Too many newlines at end of cell
|
16 | 1+1
17 | /
18 | |
19 | |
20 | |
21 | |
| |_^ W391
|
= help: Remove trailing newlines

Safe fix
15 15 |
16 16 | 1+1
17 17 |
18 |-
19 |-
20 |-
21 |-

0 comments on commit b0905c4

Please sign in to comment.