From 5da7294754e2e49bd3a1b0e5f9fe363d0d1d66a1 Mon Sep 17 00:00:00 2001 From: Connor Duncan Date: Fri, 17 Jan 2025 14:00:58 -0600 Subject: [PATCH] Add support for @upper_case @lower_case captures --- README.md | 20 ++++++++ topiary-cli/Cargo.toml | 2 +- topiary-config/Cargo.toml | 3 +- topiary-core/src/atom_collection.rs | 72 ++++++++++++++++++++++++++++- topiary-core/src/lib.rs | 11 +++++ topiary-core/src/pretty.rs | 16 +++++-- 6 files changed, 117 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 15e2c7df..c530e688 100644 --- a/README.md +++ b/README.md @@ -1410,6 +1410,26 @@ The example below solves the problem of indenting function application in OCaml ) ``` +### `@lower_case`/`@upper_case` +(Don't) Capitalize all of the text in the matched node. +Use this with care in languages that are case sensitive. + +```scheme +; example for SQL, since that's where this makes sense. +; I am using the grammar linked below +; https://github.com/DerekStride/tree-sitter-sql/tree/main + +; make keywords select,from lowercase. +( + [ + (keyword_select) + (keyword_from) + ] @lower_case +) +; make keyword WHERE uppercase +(keyword_where) @upper_case +``` + ### `#query_name!` When the logging verbosity is set to `-vv` or higher, Topiary outputs information about which queries are matched, for instance: diff --git a/topiary-cli/Cargo.toml b/topiary-cli/Cargo.toml index ad0bb884..ad4c8515 100644 --- a/topiary-cli/Cargo.toml +++ b/topiary-cli/Cargo.toml @@ -76,7 +76,7 @@ contributed = [ # Excluded by default experimental = [ - "rust", + "rust" ] bash = ["topiary-config/bash", "topiary-queries/bash"] diff --git a/topiary-config/Cargo.toml b/topiary-config/Cargo.toml index 94f92725..f2ecc0eb 100644 --- a/topiary-config/Cargo.toml +++ b/topiary-config/Cargo.toml @@ -49,6 +49,7 @@ ocamllex = [] rust = [] toml = [] tree_sitter_query = [] +sql = [] # This a convenience for the sake of downstream applications which don't # wish to cherry-pick grammars (e.g., the playground) @@ -62,5 +63,5 @@ all = [ "ocamllex", "rust", "toml", - "tree_sitter_query" + "tree_sitter_query", ] diff --git a/topiary-core/src/atom_collection.rs b/topiary-core/src/atom_collection.rs index a1e6d64f..50af01be 100644 --- a/topiary-core/src/atom_collection.rs +++ b/topiary-core/src/atom_collection.rs @@ -1,5 +1,6 @@ use std::{ borrow::Cow, + cmp::max, collections::{HashMap, HashSet}, mem, ops::Deref, @@ -8,7 +9,8 @@ use std::{ use topiary_tree_sitter_facade::Node; use crate::{ - tree_sitter::NodeExt, Atom, FormatterError, FormatterResult, ScopeCondition, ScopeInformation, + tree_sitter::NodeExt, Atom, FormatterError, FormatterResult, HowCapitalize, ScopeCondition, + ScopeInformation, }; /// A struct that holds sets of node IDs that have line breaks before or after them. @@ -278,6 +280,14 @@ impl AtomCollection { self.prepend(Atom::DeleteBegin, node, predicates); self.append(Atom::DeleteEnd, node, predicates); } + "upper_case" => { + self.prepend(Atom::CaseBegin(HowCapitalize::UpperCase), node, predicates); + self.append(Atom::CaseEnd, node, predicates); + } + "lower_case" => { + self.prepend(Atom::CaseBegin(HowCapitalize::LowerCase), node, predicates); + self.append(Atom::CaseEnd, node, predicates); + } // Scope manipulation "prepend_begin_scope" => { self.prepend( @@ -538,6 +548,7 @@ impl AtomCollection { original_position: node.start_position().into(), single_line_no_indent: false, multi_line_indent_all: false, + how_capitalize: HowCapitalize::Pass, }); // Mark all sub-nodes as having this node as a "leaf parent" self.mark_leaf_parent(node, node.id()); @@ -591,6 +602,7 @@ impl AtomCollection { self.append.entry(target_node.id()).or_default().push(atom); } + // fn capitalize(&mut self, /// Expands a softline atom to a hardline, space or empty atom depending on /// if we are in a multiline context or not. /// @@ -883,6 +895,63 @@ impl AtomCollection { } } + fn post_process_capitalization(&mut self) { + let mut case_context: Vec = Vec::new(); + for atom in &mut self.atoms { + match atom { + Atom::CaseBegin(case) => { + match case { + HowCapitalize::UpperCase => { + case_context.push(HowCapitalize::UpperCase); + *atom = Atom::Empty; + /* + if lower_case_level > 0 { + panic!("Cannot use @lower_case inside of a node captured by @upper_case!"); + }*/ + } + HowCapitalize::LowerCase => { + case_context.push(HowCapitalize::LowerCase); + *atom = Atom::Empty; + /* + if upper_case_level > 0 { + panic!("Cannot use @upper_case inside of a node captured by @lower_case!"); + }*/ + } + _ => {} + } + } + Atom::CaseEnd => { + case_context.pop(); + *atom = Atom::Empty; + } + _ => match atom { + Atom::Leaf { + content, + id, + original_position, + single_line_no_indent, + multi_line_indent_all, + .. + } => { + // TODO don't be stupid with derefs + *atom = Atom::Leaf { + content: (*content).to_string(), + id: *id, + original_position: *original_position, + single_line_no_indent: *single_line_no_indent, + multi_line_indent_all: *multi_line_indent_all, + how_capitalize: case_context + .last() + .unwrap_or(&HowCapitalize::Pass) + .clone(), + } + } + _ => {} + }, + } + } + } + /// This function merges the spaces, new lines and blank lines. /// If there are several tokens of different kind one after the other, /// the blank line is kept over the new line which itself is kept over the space. @@ -890,6 +959,7 @@ impl AtomCollection { pub fn post_process(&mut self) { self.post_process_scopes(); self.post_process_deletes(); + self.post_process_capitalization(); self.post_process_inner(); // We have taken care of spaces following an antispace. Now fix the diff --git a/topiary-core/src/lib.rs b/topiary-core/src/lib.rs index d03a38b9..2918ed37 100644 --- a/topiary-core/src/lib.rs +++ b/topiary-core/src/lib.rs @@ -38,6 +38,13 @@ pub struct ScopeInformation { scope_id: String, } +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub enum HowCapitalize { + UpperCase, + LowerCase, + #[default] + Pass, +} /// An atom represents a small piece of the output. We turn Tree-sitter nodes /// into atoms, and we add white-space atoms where appropriate. The final list /// of atoms is rendered to the output. @@ -68,6 +75,7 @@ pub enum Atom { single_line_no_indent: bool, // if the leaf is multi-line, each line will be indented, not just the first multi_line_indent_all: bool, + how_capitalize: HowCapitalize, }, /// Represents a literal string, such as a semicolon. Literal(String), @@ -86,6 +94,9 @@ pub enum Atom { // it might happen that it contains several leaves. DeleteBegin, DeleteEnd, + + CaseBegin(HowCapitalize), + CaseEnd, /// Indicates the beginning of a scope, use in combination with the /// ScopedSoftlines and ScopedConditionals below. ScopeBegin(ScopeInformation), diff --git a/topiary-core/src/pretty.rs b/topiary-core/src/pretty.rs index 8c94c285..d09379f8 100644 --- a/topiary-core/src/pretty.rs +++ b/topiary-core/src/pretty.rs @@ -4,7 +4,7 @@ use std::fmt::Write; -use crate::{Atom, FormatterError, FormatterResult}; +use crate::{Atom, FormatterError, FormatterResult, HowCapitalize}; /// Renders a slice of Atoms into an owned string. /// The indent &str is used when an `Atom::IdentStart` is encountered. @@ -44,6 +44,7 @@ pub fn render(atoms: &[Atom], indent: &str) -> FormatterResult { original_position, single_line_no_indent, multi_line_indent_all, + how_capitalize, .. } => { if *single_line_no_indent { @@ -51,10 +52,9 @@ pub fn render(atoms: &[Atom], indent: &str) -> FormatterResult { // as a `Hardline` in the atom stream. writeln!(buffer)?; } - let content = content.trim_end_matches('\n'); - let content = if *multi_line_indent_all { + let mut content = if *multi_line_indent_all { let cursor = current_column(&buffer) as i32; // original_position is 1-based @@ -71,7 +71,15 @@ pub fn render(atoms: &[Atom], indent: &str) -> FormatterResult { } else { content.into() }; - + match how_capitalize { + HowCapitalize::UpperCase => { + content = content.to_uppercase(); + } + HowCapitalize::LowerCase => { + content = content.to_lowercase(); + } + _ => {} + } write!(buffer, "{}", content)?; }