Skip to content

Commit

Permalink
Automatically detect the C stack global. (garrettgu10#6)
Browse files Browse the repository at this point in the history
If a function uses the C stack, the first instruction always seems to be a
global.get with the C stack pointer global variable. Therefore, we guess the C
stack global by looking for the global that appears most frequently in an
initial global.get.

Fixes garrettgu10#3.
  • Loading branch information
nneonneo authored Oct 8, 2021
1 parent 0a28bdb commit 4405a88
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 15 deletions.
15 changes: 4 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,10 @@ Module to load WebAssembly files into Ghidra, supporting disassembly and decompi
## Tips

- Many Wasm programs, especially those compiled by Emscripten or Clang, use a
global variable to store the C stack pointer. Real programs often make heavy use
of the C stack; it's the only place to store variables that are larger than a
single u32/u64, for example, or variables which require physical memory
addresses. In order to allow Ghidra to analyze the C stack, set the "C Stack
Pointer" in the Wasm Pre-Analyzer settings during initial analysis to the index
of the global variable which is being used as the stack pointer (this will be
the global used in the `stackSave`/`stackRestore` functions, if present, or the
global used in the function prologue of any functions which use the C stack).
Setting this option will cause Ghidra to analyze global.set/global.get
operations involving the targeted global as stack pointer manipulations, which
will allow the decompiler to recover C stack variables and objects.
global variable to store the C stack pointer. This plugin will attempt to
automatically detect the C stack pointer during analysis; if it fails, you may
need to set it yourself before performing initial analysis by setting the "C
Stack Pointer" in the Wasm Pre-Analyzer settings.
- By default, the C stack is assumed to grow in the negative direction, i.e.
towards smaller addresses. However, compilers are actually free to choose either
stack direction, and both positive and negative-growing stacks have been
Expand Down
71 changes: 68 additions & 3 deletions src/main/java/wasm/analysis/WasmPreAnalyzer.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
package wasm.analysis;

import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import ghidra.app.services.AbstractAnalyzer;
import ghidra.app.services.AnalysisPriority;
import ghidra.app.services.AnalyzerType;
import ghidra.app.util.bin.BinaryReader;
import ghidra.app.util.bin.MemoryByteProvider;
import ghidra.app.util.bin.format.dwarf4.LEB128;
import ghidra.app.util.importer.MessageLog;
import ghidra.framework.options.Options;
import ghidra.program.disassemble.Disassembler;
import ghidra.program.disassemble.DisassemblerMessageListener;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSet;
import ghidra.program.model.address.AddressSetView;
import ghidra.program.model.lang.Processor;
Expand All @@ -23,10 +30,12 @@ public class WasmPreAnalyzer extends AbstractAnalyzer {
private final static String NAME = "Wasm Pre-Analyzer";
private final static String DESCRIPTION = "Analyze Wasm code before disassembly to resolve operand sizes and jump offsets";

private final static int CSTACK_GLOBAL_DISABLE = -1;
private final static int CSTACK_GLOBAL_AUTO = -2;

private final static String OPTION_NAME_CSTACK_GLOBAL = "C Stack Pointer";
private static final String OPTION_DESCRIPTION_CSTACK_GLOBAL = "0-based index of the global variable being used as the C stack pointer. Set to -1 to disable C stack inference.";
/* Default to global0, which is what Emscripten appears to do */
private final static int OPTION_DEFAULT_CSTACK_GLOBAL = -1;
private static final String OPTION_DESCRIPTION_CSTACK_GLOBAL = "0-based index of the global variable being used as the C stack pointer. Set to -1 to disable C stack inference. Set to -2 to guess C stack pointer automatically (default).";
private final static int OPTION_DEFAULT_CSTACK_GLOBAL = CSTACK_GLOBAL_AUTO;
private int cStackGlobal = OPTION_DEFAULT_CSTACK_GLOBAL;

public WasmPreAnalyzer() {
Expand Down Expand Up @@ -54,11 +63,67 @@ public void optionsChanged(Options options, Program program) {
cStackGlobal = options.getInt(OPTION_NAME_CSTACK_GLOBAL, cStackGlobal);
}

private int guessCStackGlobalForFunction(Program program, Address funcAddress) throws IOException {
BinaryReader codeReader = new BinaryReader(new MemoryByteProvider(program.getMemory(), funcAddress), true);

/*
* Look for a global.get at the start of the function, and assume that it loads
* the C stack pointer if present
*/
if (codeReader.readNextUnsignedByte() != 0x23)
return -1;
return LEB128.readAsInt32(codeReader);
}

private int guessCStackGlobal(Program program, List<WasmFuncSignature> functions, TaskMonitor monitor) {
/* Guess the C stack global by looking at which global appears most often */
Map<Integer, Integer> cStackGuesses = new HashMap<>();
monitor.setMessage("Analyzing C stack...");
monitor.initialize(functions.size());
for (WasmFuncSignature function : functions) {
if (monitor.isCancelled()) {
return CSTACK_GLOBAL_AUTO;
}
monitor.incrementProgress(1);

if (function.isImport()) {
continue;
}

try {
int guessedGlobal = guessCStackGlobalForFunction(program, function.getStartAddr());
if (guessedGlobal != -1) {
int count = cStackGuesses.getOrDefault(guessedGlobal, 0);
cStackGuesses.put(guessedGlobal, count + 1);
}
} catch (IOException e) {
Msg.error(this, "Failed to analyze function " + function.getName(), e);
}
}

int bestGuess = CSTACK_GLOBAL_DISABLE;
int bestCount = -1;
for (Map.Entry<Integer, Integer> entry : cStackGuesses.entrySet()) {
if (entry.getValue() > bestCount) {
bestGuess = entry.getKey();
bestCount = entry.getValue();
}
}
Msg.info(this, "Guessed C stack global: " + bestGuess);
return bestGuess;
}

@Override
public boolean added(Program program, AddressSetView set, TaskMonitor monitor, MessageLog log) throws CancelledException {
monitor.setMessage("Parsing module...");
WasmAnalysis state = WasmAnalysis.getState(program);
List<WasmFuncSignature> functions = state.getFunctions();

if (cStackGlobal == CSTACK_GLOBAL_AUTO) {
cStackGlobal = guessCStackGlobal(program, functions, monitor);
}

monitor.setMessage("Analyzing functions...");
monitor.initialize(functions.size());

Disassembler disassembler = Disassembler.getDisassembler(program, monitor, new DisassemblerMessageListener() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public static WasmNameSubsection createSubsection(BinaryReader reader) throws IO
case NAME_LOCAL:
return new WasmNameLocalSubsection(sectionReader);
case NAME_LABELS:
// not supported at the moment
// TODO: not supported at the moment
return new WasmNameUnknownSubsection(sectionReader);
case NAME_TYPE:
return new WasmNameMapSubsection("type", sectionReader);
Expand Down

0 comments on commit 4405a88

Please sign in to comment.