Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for @upper_case @lower_case captures #838

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1410,6 +1410,26 @@ The example below solves the problem of indenting function application in OCaml
)
```

### `@lower_case`/`@upper_case`
(Don't) Capitalize all of the text in the matched node.
Use this with care in languages that are case sensitive.

```scheme
; example for SQL, since that's where this makes sense.
; I am using the grammar linked below
; https://github.com/DerekStride/tree-sitter-sql/tree/main
; make keywords select,from lowercase.
(
[
(keyword_select)
(keyword_from)
] @lower_case
)
; make keyword WHERE uppercase
(keyword_where) @upper_case
```

### `#query_name!`

When the logging verbosity is set to `-vv` or higher, Topiary outputs information about which queries are matched, for instance:
Expand Down
2 changes: 1 addition & 1 deletion topiary-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ contributed = [

# Excluded by default
experimental = [
"rust",
"rust"
]

bash = ["topiary-config/bash", "topiary-queries/bash"]
Expand Down
3 changes: 2 additions & 1 deletion topiary-config/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ ocamllex = []
rust = []
toml = []
tree_sitter_query = []
sql = []

# This a convenience for the sake of downstream applications which don't
# wish to cherry-pick grammars (e.g., the playground)
Expand All @@ -62,5 +63,5 @@ all = [
"ocamllex",
"rust",
"toml",
"tree_sitter_query"
"tree_sitter_query",
]
72 changes: 71 additions & 1 deletion topiary-core/src/atom_collection.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::{
borrow::Cow,
cmp::max,
collections::{HashMap, HashSet},
mem,
ops::Deref,
Expand All @@ -8,7 +9,8 @@ use std::{
use topiary_tree_sitter_facade::Node;

use crate::{
tree_sitter::NodeExt, Atom, FormatterError, FormatterResult, ScopeCondition, ScopeInformation,
tree_sitter::NodeExt, Atom, FormatterError, FormatterResult, HowCapitalize, ScopeCondition,
ScopeInformation,
};

/// A struct that holds sets of node IDs that have line breaks before or after them.
Expand Down Expand Up @@ -278,6 +280,14 @@ impl AtomCollection {
self.prepend(Atom::DeleteBegin, node, predicates);
self.append(Atom::DeleteEnd, node, predicates);
}
"upper_case" => {
self.prepend(Atom::CaseBegin(HowCapitalize::UpperCase), node, predicates);
self.append(Atom::CaseEnd, node, predicates);
}
"lower_case" => {
self.prepend(Atom::CaseBegin(HowCapitalize::LowerCase), node, predicates);
self.append(Atom::CaseEnd, node, predicates);
}
// Scope manipulation
"prepend_begin_scope" => {
self.prepend(
Expand Down Expand Up @@ -538,6 +548,7 @@ impl AtomCollection {
original_position: node.start_position().into(),
single_line_no_indent: false,
multi_line_indent_all: false,
how_capitalize: HowCapitalize::Pass,
});
// Mark all sub-nodes as having this node as a "leaf parent"
self.mark_leaf_parent(node, node.id());
Expand Down Expand Up @@ -591,6 +602,7 @@ impl AtomCollection {
self.append.entry(target_node.id()).or_default().push(atom);
}

// fn capitalize(&mut self,
/// Expands a softline atom to a hardline, space or empty atom depending on
/// if we are in a multiline context or not.
///
Expand Down Expand Up @@ -883,13 +895,71 @@ impl AtomCollection {
}
}

fn post_process_capitalization(&mut self) {
let mut case_context: Vec<HowCapitalize> = Vec::new();
for atom in &mut self.atoms {
match atom {
Atom::CaseBegin(case) => {
match case {
HowCapitalize::UpperCase => {
case_context.push(HowCapitalize::UpperCase);
*atom = Atom::Empty;
/*
if lower_case_level > 0 {
panic!("Cannot use @lower_case inside of a node captured by @upper_case!");
}*/
}
HowCapitalize::LowerCase => {
case_context.push(HowCapitalize::LowerCase);
*atom = Atom::Empty;
/*
if upper_case_level > 0 {
panic!("Cannot use @upper_case inside of a node captured by @lower_case!");
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is probably not ideal, but I wasn't sure what the preferred way of handling such errors is. There may also be times where users want to capitalize all but a subset of a node, which this would need to be changed to support.

}*/
}
_ => {}
}
}
Atom::CaseEnd => {
case_context.pop();
*atom = Atom::Empty;
}
_ => match atom {
Atom::Leaf {
content,
id,
original_position,
single_line_no_indent,
multi_line_indent_all,
..
} => {
// TODO don't be stupid with derefs
*atom = Atom::Leaf {
content: (*content).to_string(),
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the derefs here feel bad. I'm sure there's a rustier way to do this.

id: *id,
original_position: *original_position,
single_line_no_indent: *single_line_no_indent,
multi_line_indent_all: *multi_line_indent_all,
how_capitalize: case_context
.last()
.unwrap_or(&HowCapitalize::Pass)
.clone(),
}
}
_ => {}
},
}
}
}

/// This function merges the spaces, new lines and blank lines.
/// If there are several tokens of different kind one after the other,
/// the blank line is kept over the new line which itself is kept over the space.
/// Furthermore, this function put the indentation delimiters before any space/line atom.
pub fn post_process(&mut self) {
self.post_process_scopes();
self.post_process_deletes();
self.post_process_capitalization();
self.post_process_inner();

// We have taken care of spaces following an antispace. Now fix the
Expand Down
11 changes: 11 additions & 0 deletions topiary-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ pub struct ScopeInformation {
scope_id: String,
}

#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub enum HowCapitalize {
UpperCase,
LowerCase,
#[default]
Pass,
}
/// An atom represents a small piece of the output. We turn Tree-sitter nodes
/// into atoms, and we add white-space atoms where appropriate. The final list
/// of atoms is rendered to the output.
Expand Down Expand Up @@ -68,6 +75,7 @@ pub enum Atom {
single_line_no_indent: bool,
// if the leaf is multi-line, each line will be indented, not just the first
multi_line_indent_all: bool,
how_capitalize: HowCapitalize,
},
/// Represents a literal string, such as a semicolon.
Literal(String),
Expand All @@ -86,6 +94,9 @@ pub enum Atom {
// it might happen that it contains several leaves.
DeleteBegin,
DeleteEnd,

CaseBegin(HowCapitalize),
CaseEnd,
/// Indicates the beginning of a scope, use in combination with the
/// ScopedSoftlines and ScopedConditionals below.
ScopeBegin(ScopeInformation),
Expand Down
16 changes: 12 additions & 4 deletions topiary-core/src/pretty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
use std::fmt::Write;

use crate::{Atom, FormatterError, FormatterResult};
use crate::{Atom, FormatterError, FormatterResult, HowCapitalize};

/// Renders a slice of Atoms into an owned string.
/// The indent &str is used when an `Atom::IdentStart` is encountered.
Expand Down Expand Up @@ -44,17 +44,17 @@ pub fn render(atoms: &[Atom], indent: &str) -> FormatterResult<String> {
original_position,
single_line_no_indent,
multi_line_indent_all,
how_capitalize,
..
} => {
if *single_line_no_indent {
// The line break after the content has been previously added
// as a `Hardline` in the atom stream.
writeln!(buffer)?;
}

let content = content.trim_end_matches('\n');

let content = if *multi_line_indent_all {
let mut content = if *multi_line_indent_all {
let cursor = current_column(&buffer) as i32;

// original_position is 1-based
Expand All @@ -71,7 +71,15 @@ pub fn render(atoms: &[Atom], indent: &str) -> FormatterResult<String> {
} else {
content.into()
};

match how_capitalize {
HowCapitalize::UpperCase => {
content = content.to_uppercase();
}
HowCapitalize::LowerCase => {
content = content.to_lowercase();
}
_ => {}
}
write!(buffer, "{}", content)?;
}

Expand Down