-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for JavaScript #59
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,220 @@ | ||||||
import io | ||||||
import os | ||||||
from ..utils import * | ||||||
from pathlib import Path | ||||||
from ...entities import * | ||||||
from ...graph import Graph | ||||||
from typing import Optional | ||||||
from ..analyzer import AbstractAnalyzer | ||||||
|
||||||
import tree_sitter_javascript as tsjs | ||||||
from tree_sitter import Language, Parser, Node | ||||||
|
||||||
JS_LANGUAGE = Language(tsjs.language()) | ||||||
|
||||||
import logging | ||||||
logger = logging.getLogger('code_graph') | ||||||
|
||||||
class JavaScriptAnalyzer(AbstractAnalyzer): | ||||||
def __init__(self) -> None: | ||||||
self.parser = Parser(JS_LANGUAGE) | ||||||
|
||||||
def process_function_declaration(self, node: Node, path: Path, source_code: str) -> Optional[Function]: | ||||||
""" | ||||||
Processes a function declaration node to extract function details. | ||||||
|
||||||
Args: | ||||||
node (Node): The AST node representing a function declaration. | ||||||
path (Path): The file path where the function is defined. | ||||||
|
||||||
Returns: | ||||||
Optional[Function]: A Function object containing details about the function, or None if the function name cannot be determined. | ||||||
""" | ||||||
|
||||||
# Extract function name | ||||||
res = find_child_of_type(node, 'identifier') | ||||||
if res is None: | ||||||
return None | ||||||
|
||||||
identifier = res[0] | ||||||
function_name = identifier.text.decode('utf-8') | ||||||
logger.info(f"Function declaration: {function_name}") | ||||||
|
||||||
# Extract function parameters | ||||||
args = [] | ||||||
res = find_child_of_type(node, 'formal_parameters') | ||||||
if res is not None: | ||||||
parameters = res[0] | ||||||
|
||||||
# Extract arguments and their types | ||||||
for child in parameters.children: | ||||||
if child.type == 'identifier': | ||||||
arg_name = child.text.decode('utf-8') | ||||||
args.append((arg_name, 'Unknown')) | ||||||
|
||||||
# Extract function definition line numbers | ||||||
start_line = node.start_point[0] | ||||||
end_line = node.end_point[0] | ||||||
|
||||||
# Create Function object | ||||||
docs = '' | ||||||
src = source_code[node.start_byte:node.end_byte] | ||||||
f = Function(str(path), function_name, docs, 'Unknown', src, start_line, end_line) | ||||||
|
||||||
# Add arguments to Function object | ||||||
for arg in args: | ||||||
name = arg[0] | ||||||
type_ = arg[1] | ||||||
f.add_argument(name, type_) | ||||||
|
||||||
return f | ||||||
|
||||||
def process_class_declaration(self, node: Node, path: Path) -> Optional[Class]: | ||||||
""" | ||||||
Processes a class declaration node to extract class details. | ||||||
|
||||||
Args: | ||||||
node (Node): The AST node representing a class declaration. | ||||||
path (Path): The file path where the class is defined. | ||||||
|
||||||
Returns: | ||||||
Optional[Class]: A Class object containing details about the class, or None if the class name cannot be determined. | ||||||
""" | ||||||
|
||||||
# Extract class name | ||||||
res = find_child_of_type(node, 'identifier') | ||||||
if res is None: | ||||||
return None | ||||||
|
||||||
identifier = res[0] | ||||||
class_name = identifier.text.decode('utf-8') | ||||||
logger.info(f"Class declaration: {class_name}") | ||||||
|
||||||
# Extract class definition line numbers | ||||||
start_line = node.start_point[0] | ||||||
end_line = node.end_point[0] | ||||||
|
||||||
# Create Class object | ||||||
docs = '' | ||||||
c = Class(str(path), class_name, docs, start_line, end_line) | ||||||
|
||||||
return c | ||||||
|
||||||
def first_pass(self, path: Path, f: io.TextIOWrapper, graph: Graph) -> None: | ||||||
""" | ||||||
Perform the first pass processing of a JavaScript source file. | ||||||
|
||||||
Args: | ||||||
path (Path): The path to the JavaScript source file. | ||||||
f (io.TextIOWrapper): The file object representing the opened JavaScript source file. | ||||||
graph (Graph): The Graph object where entities will be added. | ||||||
|
||||||
Returns: | ||||||
None | ||||||
""" | ||||||
Comment on lines
+104
to
+114
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comment should include information about what the function does, "processing JavaScript file" is too general. |
||||||
|
||||||
if path.suffix != '.js': | ||||||
logger.debug(f"Skipping none JavaScript file {path}") | ||||||
return | ||||||
|
||||||
logger.info(f"Processing {path}") | ||||||
|
||||||
# Create file entity | ||||||
file = File(os.path.dirname(path), path.name, path.suffix) | ||||||
graph.add_file(file) | ||||||
|
||||||
# Parse file | ||||||
source_code = f.read() | ||||||
tree = self.parser.parse(source_code) | ||||||
try: | ||||||
source_code = source_code.decode('utf-8') | ||||||
except Exception as e: | ||||||
logger.error(f"Failed decoding source code: {e}") | ||||||
source_code = '' | ||||||
|
||||||
# Process function declarations | ||||||
query = JS_LANGUAGE.query("(function_declaration) @function") | ||||||
captures = query.captures(tree.root_node) | ||||||
if 'function' in captures: | ||||||
functions = captures['function'] | ||||||
for node in functions: | ||||||
entity = self.process_function_declaration(node, path, source_code) | ||||||
if entity is not None: | ||||||
graph.add_function(entity) | ||||||
graph.connect_entities('DEFINES', file.id, entity.id) | ||||||
|
||||||
# Process class declarations | ||||||
query = JS_LANGUAGE.query("(class_declaration) @class") | ||||||
captures = query.captures(tree.root_node) | ||||||
if 'class' in captures: | ||||||
classes = captures['class'] | ||||||
for node in classes: | ||||||
entity = self.process_class_declaration(node, path) | ||||||
if entity is not None: | ||||||
graph.add_class(entity) | ||||||
graph.connect_entities('DEFINES', file.id, entity.id) | ||||||
|
||||||
def second_pass(self, path: Path, f: io.TextIOWrapper, graph: Graph) -> None: | ||||||
""" | ||||||
Perform the second pass processing of a JavaScript source file to establish function call relationships. | ||||||
|
||||||
Args: | ||||||
path (Path): The path to the JavaScript source file. | ||||||
f (io.TextIOWrapper): The file object representing the opened JavaScript source file. | ||||||
graph (Graph): The Graph object containing entities (functions and files) to establish relationships. | ||||||
|
||||||
Returns: | ||||||
None | ||||||
""" | ||||||
|
||||||
if path.suffix != '.js': | ||||||
logger.debug(f"Skipping none JavaScript file {path}") | ||||||
return | ||||||
|
||||||
logger.info(f"Processing {path}") | ||||||
|
||||||
# Get file entity | ||||||
file = graph.get_file(os.path.dirname(path), path.name, path.suffix) | ||||||
if file is None: | ||||||
logger.error(f"File entity not found for: {path}") | ||||||
return | ||||||
|
||||||
try: | ||||||
# Parse file | ||||||
content = f.read() | ||||||
tree = self.parser.parse(content) | ||||||
except Exception as e: | ||||||
logger.error(f"Failed to process file {path}: {e}") | ||||||
return | ||||||
Comment on lines
+182
to
+188
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is a bit of a waste, we've already read the file and parsed it on the first pass. |
||||||
|
||||||
# Locate function invocation | ||||||
query_call_exp = JS_LANGUAGE.query("(call_expression function: (identifier) @callee)") | ||||||
|
||||||
# Locate function definitions | ||||||
query_function_def = JS_LANGUAGE.query(""" | ||||||
( | ||||||
function_declaration | ||||||
declarator: (identifier) @function_name | ||||||
)""") | ||||||
|
||||||
function_defs = query_function_def.captures(tree.root_node) | ||||||
for function_def in function_defs: | ||||||
caller = function_def[0] | ||||||
caller_name = caller.text.decode('utf-8') | ||||||
caller_f = graph.get_function_by_name(caller_name) | ||||||
assert(caller_f is not None) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using assert in production code is unconventional. Consider handling this case more gracefully.
Suggested change
Copilot is powered by AI, so mistakes are possible. Review output carefully before use. |
||||||
|
||||||
function_calls = query_call_exp.captures(caller.parent.parent) | ||||||
for function_call in function_calls: | ||||||
callee = function_call[0] | ||||||
callee_name = callee.text.decode('utf-8') | ||||||
callee_f = graph.get_function_by_name(callee_name) | ||||||
|
||||||
if callee_f is None: | ||||||
# Create missing function | ||||||
# Assuming this is a call to a native function | ||||||
callee_f = Function('/', callee_name, None, None, None, 0, 0) | ||||||
graph.add_function(callee_f) | ||||||
|
||||||
# Connect the caller and callee in the graph | ||||||
graph.connect_entities('CALLS', caller_f.id, callee_f.id) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,3 +21,33 @@ def find_child_of_type(node: Node, child_type: str) -> Union[tuple[Node, int], N | |
return (child, idx) | ||
|
||
return None | ||
|
||
def extract_js_function_name(node: Node) -> str: | ||
""" | ||
Extract the function name from a JavaScript function node. | ||
|
||
Args: | ||
node (Node): The AST node representing the function. | ||
|
||
Returns: | ||
str: The name of the function. | ||
""" | ||
for child in node.children: | ||
if child.type == 'identifier': | ||
return child.text.decode('utf-8') | ||
return '' | ||
|
||
def extract_js_class_name(node: Node) -> str: | ||
""" | ||
Extract the class name from a JavaScript class node. | ||
|
||
Args: | ||
node (Node): The AST node representing the class. | ||
|
||
Returns: | ||
str: The name of the class. | ||
""" | ||
for child in node.children: | ||
if child.type == 'identifier': | ||
return child.text.decode('utf-8') | ||
return '' | ||
Comment on lines
+25
to
+53
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Language specific utilizes shouldn't be added to |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🛠️ Refactor suggestion
Avoid
import *
from relatively unknown modules.Using star imports (
from ..utils import *
andfrom ...entities import *
) can cause namespace conflicts and obfuscate which names are actually used. Consider importing only the required objects or using explicit imports to maintain clearer code and prevent accidental overrides.🧰 Tools
🪛 Ruff (0.8.2)
3-3:
from ..utils import *
used; unable to detect undefined names(F403)