This project develops a toy language (JLC), that combines of C and Java.
The compiler consists two parts: the front end and the back end.
The front end is generated by BNFC, using the syntax configuration file javalette.cf under src.
And the back end also generally consists two parts : Type checker and llvm generator.
Additionally, for easy debugging of syntax configuration, a simple web interface is provide to show the parsed AST is provided. You can find it at ast_viewer
Here is the sample code from jlc to .ll
int main()
{
printString("Hello World");
return 0;
}
;-----------llvm code-----------
@zero_size_array = constant [1 x i32] [i32 0]
@str_0 = constant [12 x i8] c"Hello World\00"
; --- internal global variables start ---
; --- internal global variables end ---
; --- internal functions start ---
declare void @printInt(i32)
declare void @printDouble(double)
declare void @printString(ptr)
declare i32 @readInt()
declare double @readDouble()
declare ptr @malloc(i32)
declare ptr @calloc(i32, i32)
declare ptr @gen_nda(ptr, i32, i32)
; --- internal functions end ---
; --- defined enum start ---
; --- defined enum end ---
; --- defined struct start ---
; --- defined struct end ---
; --- defined class start ---
; --- defined class end ---
define i32 @main() {
entry_0:
; printString ("Hello World");
call void @printString(ptr @str_0)
ret i32 0
}
;-----------llvm code-----------
git clone xxx
# pull submodule which includes the testsuit
git submodule update --init
- Compiling this project doesn't require any additional dependeces.
- However running it with LLVM requires llvm package to be installed. (>=16.0.0).
- And if you want to regenerate the parser, which means you want to modify the syntax configuration, you need to install BNFC package.
make gen
The code under parser part will be regenerated.
make jlc
jlc, is the compiler we use to compile xxx.jl
make test
It compiles all the test_xxx.cpp
files under test.
scripts under scripts are for running and debugging.
make jlc -j12
bash scripts/build_and_run.sh test_code/temp_op.jl
you will get
OK
Running the generated llvm ir code:
-----------------------------------
Hello World
-----------------------------------
you can see testlogs we run under testlog
# only test the parser:
bash scripts/run_test_parser.sh
# only test the basic cases with llvm
bash scripts/run_test_llvm.sh
# test llvm with extension
bash scripts/run_test_llvm_ext.sh
In this stage, type checking is seperated into several detail stages. The code is located under src/typechecker.
Generally, it includes stage:
- user defined type declearation jlc_tc_udt_dcl.cpp
- user defined type definition jlc_tc_udt_def.cpp
- function declearation jlc_tc_func_dcl.cpp
- function definition jlc_tc_func_def.cpp
Instead of using LLVM development framework directly, we implemented a set of APIs llvm.cpp to generate LLVM code ourselves.
More specific design details will be discussed in the following documents.
Control flow
Array
Enum
Struct
Class
Runtime polymorphically
- INT
- DOUBLE
- BOOLEAN
- STRING (cannot be defined as a variable now).
int main(){
int a = 1;
double b = 1.0;
boolean c = false;
printInt(a);
printDouble(b);
if(c==false){
printString("c is false");
}
return 0;
}
int main(){
if(true){
printString("if-block");
}
if(false){
}else{
printString("if-else block");
}
int i = 0;
while (i<5){
i++;
}
return 0;
}
enum Color {
RED,
GREEN,
BLUE
};
int main(){
Color color = Color.RED;
if(color == Color.RED){
printString("color is RED");
}
return 0;
}
typedef struct A_t * A;
struct A_t{
int a;
};
// or
typedef struct B_t* B;
struct B_t {
int b;
};
int main(){
// define a struct variable
A a = new A_t;
// access memeber
a->a = 1;
printInt(a->a);
return 0;
}
class A {
int val;
void incr () {val++; return;}
int value () {return val;}
}
int main(){
A a = new A;
a.incr();
printInt(a.val);
return 0;
}
int main(){
int[] a = new int[3];
a[0] = 1;
a[1] = 2;
a[2] = 3;
printInt(a.length);
for(int e: a){
printInt(e);
}
int[][] b = new int[2][2];
return 0;
}
enum Color {
RED,
GREEN,
BLUE
};
typedef struct A_t * A;
struct A_t{
int a;
};
class B {
int val;
void incr () {val++; return;}
int value () {return val;}
}
int main(){
Color[] c = new Color[10];
# A[n] is not initialized
# need user to initialize it
A[] a = new A[10];
a[0] = new A_t;
B[] b = new B[10];
b[0] = new B;
return 0;
}
class A {
void f(){
printString("I am A");
}
}
class B extends A {
void f(){
printString("I am B");
}
}
class C extends B {
void f(){
printString("I am C");
}
}
int main(){
A a = new A;
A b = new B;
A c = new C;
a.f();
b.f();
c.f();
return 0;
}
op | example | comment |
---|---|---|
+ | a=a+b; | |
- | a=a-b; a= -b; -b; | |
++ | a++; | |
-- | a--; | |
x | a=a*b; | |
/ | a=a/b; | |
% | a=a%b; | |
! | !true==false; | |
== | a==b | |
!= | a!=b | |
> | a>b | |
>= | a>=b | |
< | a<b | |
<= | a<=b | |
() | (int)0.0 | |
-> | a->a | only for struct |
[] | a[0] | |
. | a.a | for enum,class |
new | new A; |