Skip to content

Commit

Permalink
Drop old OptStack option, rename StackLoop to StringEncOptLocal
Browse files Browse the repository at this point in the history
  • Loading branch information
antoniofrighetto committed Jan 27, 2025
1 parent 2ec6f40 commit 57cda4f
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 175 deletions.
11 changes: 3 additions & 8 deletions src/core/python/pyobf_opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,18 @@ py::module_ &py_init_obf_opt(py::module_ &m) {
)delim")
.def(py::init<>());

py::class_<StringEncOptStack>(m, "StringEncOptStack",
py::class_<StringEncOptLocal>(m, "StringEncOptLocal",
R"delim(
Option for the :meth:`omvll.ObfuscationConfig.obfuscate_string` protection.
This option protects the string with a stack decoding.
This option protects the string lazily when used within the function.
.. danger::
For large strings, this option can introduce a **huge** overhead if the `loopThreshold` is not used.
)delim")
.def(py::init<>())
.def(py::init<size_t>(),
R"delim(
Contructor that defines the string length threshold from which the decoding routine must be looped.
)delim",
"loopThreshold"_a);
.def(py::init<>());

py::class_<StringEncOptReplace>(m, "StringEncOptReplace",
R"delim(
Expand Down
28 changes: 9 additions & 19 deletions src/include/omvll/passes/string-encoding/StringEncoding.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ struct StringEncoding : llvm::PassInfoMixin<StringEncoding> {

enum EncodingTy {
None = 0,
Stack,
StackLoop,
Local,
Global,
Replace,
};
Expand All @@ -62,14 +61,10 @@ struct StringEncoding : llvm::PassInfoMixin<StringEncoding> {
llvm::GlobalVariable &G,
llvm::ConstantDataSequential &Data,
const EncodingInfo &Info);
bool injectOnStack(llvm::Instruction &I, llvm::Use &Op,
llvm::GlobalVariable &G,
llvm::ConstantDataSequential &Data,
const EncodingInfo &Info);
bool injectOnStackLoop(llvm::Instruction &I, llvm::Use &Op,
llvm::GlobalVariable &G,
llvm::ConstantDataSequential &Data,
const EncodingInfo &Info);
bool injectDecodingLocally(llvm::Instruction &I, llvm::Use &Op,
llvm::GlobalVariable &G,
llvm::ConstantDataSequential &Data,
const EncodingInfo &Info);
bool process(llvm::Instruction &I, llvm::Use &Op, llvm::GlobalVariable &G,
llvm::ConstantDataSequential &Data, StringEncodingOpt &Opt);
bool processReplace(llvm::Instruction &I, llvm::Use &Op,
Expand All @@ -78,15 +73,10 @@ struct StringEncoding : llvm::PassInfoMixin<StringEncoding> {
StringEncOptReplace &Rep);
bool processGlobal(llvm::Instruction &I, llvm::Use &Op,
llvm::GlobalVariable &G,
llvm::ConstantDataSequential &Data,
StringEncOptGlobal &Global);
bool processOnStack(llvm::Instruction &I, llvm::Use &Op,
llvm::GlobalVariable &G,
llvm::ConstantDataSequential &Data,
const StringEncOptStack &Stack);
bool processOnStackLoop(llvm::Instruction &I, llvm::Use &Op,
llvm::GlobalVariable &G,
llvm::ConstantDataSequential &Data);
llvm::ConstantDataSequential &Data);
bool processLocal(llvm::Instruction &I, llvm::Use &Op,
llvm::GlobalVariable &G,
llvm::ConstantDataSequential &Data);

inline EncodingInfo *getEncoding(const llvm::GlobalVariable &GV) {
if (auto It = GVarEncInfo.find(&GV); It != GVarEncInfo.end())
Expand Down
15 changes: 4 additions & 11 deletions src/include/omvll/passes/string-encoding/StringEncodingOpt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,9 @@

namespace omvll {

struct StringEncOptStack {
size_t LoopThreshold = 10;
};

struct StringEncOptSkip {};
struct StringEncOptGlobal {};
struct StringEncOptLocal {};
struct StringEncOptDefault {};

struct StringEncOptReplace {
Expand All @@ -25,12 +22,8 @@ struct StringEncOptReplace {
std::string NewString;
};

using StringEncodingOpt = std::variant<
StringEncOptSkip,
StringEncOptStack,
StringEncOptGlobal,
StringEncOptReplace,
StringEncOptDefault
>;
using StringEncodingOpt =
std::variant<StringEncOptSkip, StringEncOptLocal, StringEncOptGlobal,
StringEncOptReplace, StringEncOptDefault>;

} // end namespace omvll
162 changes: 27 additions & 135 deletions src/passes/string-encoding/StringEncoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ static CallInst *
createDecodingTrampoline(GlobalVariable &G, Use &EncPtr, Instruction *NewPt,
uint64_t KeyValI64, uint64_t Size,
const StringEncoding::EncodingInfo &EI,
bool IsPartOfStackVariable = false) {
bool IsLocalToFunction = false) {
// Allocas first.
auto It = NewPt->getFunction()->getEntryBlock().begin();
while (It->getOpcode() == Instruction::Alloca)
Expand Down Expand Up @@ -174,7 +174,7 @@ createDecodingTrampoline(GlobalVariable &G, Use &EncPtr, Instruction *NewPt,
Value *Input = IRB.CreateBitCast(&G, IRB.getInt8PtrTy());
Value *Output = Input;

if (IsPartOfStackVariable)
if (IsLocalToFunction)
Output = IRB.CreateInBoundsGEP(BufferTy, ClearBuffer,
{IRB.getInt64(0), IRB.getInt64(0)});

Expand Down Expand Up @@ -208,7 +208,7 @@ createDecodingTrampoline(GlobalVariable &G, Use &EncPtr, Instruction *NewPt,
ToString(*E), ToString(*V)));
}

if (!IsPartOfStackVariable)
if (!IsLocalToFunction)
return IRB.CreateCall(NewF->getFunctionType(), NewF, Args);

auto *BoolType = IRB.getInt1Ty();
Expand Down Expand Up @@ -396,80 +396,13 @@ bool StringEncoding::injectDecoding(Instruction &I, Use &Op, GlobalVariable &G,
case EncodingTy::Replace:
case EncodingTy::Global:
return false;
case EncodingTy::Stack:
return injectOnStack(I, Op, G, Data, Info);
case EncodingTy::StackLoop:
return injectOnStackLoop(I, Op, G, Data, Info);
case EncodingTy::Local:
return injectDecodingLocally(I, Op, G, Data, Info);
}

return false;
llvm_unreachable("Unhandled case");
}

bool StringEncoding::injectOnStack(Instruction &I, Use &Op, GlobalVariable &G,
ConstantDataSequential &Data,
const StringEncoding::EncodingInfo &Info) {
auto *Key = std::get_if<KeyBufferTy>(&Info.Key);
if (!Key)
fatalError("String stack decoding is expecting a buffer as a key");

StringRef Str = Data.getRawDataValues();
uint64_t StrSz = Str.size();

Use &EncPtr = Op;
IRBuilder<NoFolder> IRB(I.getParent());
IRB.SetInsertPoint(&I);

// Allocate a buffer on the stack that will contain the decoded string.
AllocaInst *ClearBuffer =
IRB.CreateAlloca(IRB.getInt8Ty(), IRB.getInt32(StrSz));
SmallVector<size_t, 20> Indexes(StrSz);

for (size_t I = 0; I < Indexes.size(); ++I)
Indexes[I] = I;

shuffle(Indexes.begin(), Indexes.end(), *RNG);

for (size_t I = 0; I < StrSz; ++I) {
size_t J = Indexes[I];
// Access the char at EncPtr[I].
Value *EncGEP = IRB.CreateGEP(
IRB.getInt8Ty(), IRB.CreatePointerCast(EncPtr, IRB.getInt8PtrTy()),
IRB.getInt32(J));

// Load the encoded char.
LoadInst *EncVal = IRB.CreateLoad(IRB.getInt8Ty(), EncGEP);
addMetadata(*EncVal, MetaObf(ProtectFieldAccess));

Value *DecodedGEP =
IRB.CreateGEP(IRB.getInt8Ty(), ClearBuffer, IRB.getInt32(J));
StoreInst *StoreKey =
IRB.CreateStore(ConstantInt::get(IRB.getInt8Ty(), (*Key)[J]),
DecodedGEP, /* volatile */ true);
addMetadata(*StoreKey, {
MetaObf(ProtectFieldAccess),
MetaObf(OpaqueCst),
});

LoadInst *KeyVal = IRB.CreateLoad(IRB.getInt8Ty(), DecodedGEP);
addMetadata(*KeyVal, MetaObf(ProtectFieldAccess));

// Decode the value with a xor.
Value *DecVal = IRB.CreateXor(KeyVal, EncVal);

if (auto *Op = dyn_cast<Instruction>(DecVal))
addMetadata(*Op, MetaObf(OpaqueOp, 2LLU));

// Store the value.
StoreInst *StoreClear =
IRB.CreateStore(DecVal, DecodedGEP, /* volatile */ true);
addMetadata(*StoreClear, MetaObf(ProtectFieldAccess));
}

I.setOperand(Op.getOperandNo(), ClearBuffer);
return true;
}

bool StringEncoding::injectOnStackLoop(
bool StringEncoding::injectDecodingLocally(
Instruction &I, Use &Op, GlobalVariable &G, ConstantDataSequential &Data,
const StringEncoding::EncodingInfo &Info) {
auto *Key = std::get_if<KeyIntTy>(&Info.Key);
Expand All @@ -487,28 +420,20 @@ bool StringEncoding::injectOnStackLoop(
bool StringEncoding::process(Instruction &I, Use &Op, GlobalVariable &G,
ConstantDataSequential &Data,
StringEncodingOpt &Opt) {
bool Changed =
std::visit(overloaded{
[&](StringEncOptSkip &) { return false; },
[&](StringEncOptStack &Stack) {
return processOnStack(I, Op, G, Data, Stack);
},
[&](StringEncOptGlobal &Global) {
return processGlobal(I, Op, G, Data, Global);
},
[&](StringEncOptReplace &Rep) {
return processReplace(I, Op, G, Data, Rep);
},
[&](StringEncOptDefault &) {
if (Data.getElementByteSize() < 20) {
StringEncOptStack Stack{6};
return processOnStack(I, Op, G, Data, Stack);
}
StringEncOptGlobal Global;
return processGlobal(I, Op, G, Data, Global);
},
},
Opt);
bool Changed = std::visit(
overloaded{
[&](StringEncOptSkip &) { return false; },
[&](StringEncOptLocal &) { return processLocal(I, Op, G, Data); },
[&](StringEncOptGlobal &) { return processGlobal(I, Op, G, Data); },
[&](StringEncOptReplace &Rep) {
return processReplace(I, Op, G, Data, Rep);
},
[&](StringEncOptDefault &) {
// Default to local, if no option is specified.
return processLocal(I, Op, G, Data);
},
},
Opt);
return Changed;
}

Expand Down Expand Up @@ -537,8 +462,7 @@ bool StringEncoding::processReplace(Instruction &I, Use &Op, GlobalVariable &G,
}

bool StringEncoding::processGlobal(Instruction &I, Use &Op, GlobalVariable &G,
ConstantDataSequential &Data,
StringEncOptGlobal &Global) {
ConstantDataSequential &Data) {
Module *M = I.getModule();
LLVMContext &Ctx = I.getContext();
StringRef Str = Data.getRawDataValues();
Expand Down Expand Up @@ -634,9 +558,8 @@ void StringEncoding::annotateRoutine(Module &M) {
}
}

bool StringEncoding::processOnStackLoop(Instruction &I, Use &Op,
GlobalVariable &G,
ConstantDataSequential &Data) {
bool StringEncoding::processLocal(Instruction &I, Use &Op, GlobalVariable &G,
ConstantDataSequential &Data) {
LLVMContext &Ctx = I.getContext();
StringRef Str = Data.getRawDataValues();
uint64_t StrSz = Str.size();
Expand All @@ -647,7 +570,7 @@ bool StringEncoding::processOnStackLoop(Instruction &I, Use &Op,
SDEBUG("Key for {}: 0x{:010x}", Str.str(), Key);

std::vector<uint8_t> Encoded(StrSz);
EncodingInfo EI(EncodingTy::StackLoop);
EncodingInfo EI(EncodingTy::Local);
EI.Key = Key;

genRoutines(Triple(I.getModule()->getTargetTriple()), EI, Ctx);
Expand All @@ -664,39 +587,8 @@ bool StringEncoding::processOnStackLoop(Instruction &I, Use &Op,
G.setInitializer(StrEnc);

auto It = GVarEncInfo.insert({&G, std::move(EI)}).first;
return injectOnStackLoop(I, Op, G, *cast<ConstantDataSequential>(StrEnc),
It->getSecond());
}

bool StringEncoding::processOnStack(Instruction &I, Use &Op, GlobalVariable &G,
ConstantDataSequential &Data,
const StringEncOptStack &Stack) {
StringRef Str = Data.getRawDataValues();
uint64_t StrSz = Str.size();
std::uniform_int_distribution<uint8_t> Dist(1, 254);

SDEBUG("[{}] {}: {}", name(), I.getFunction()->getName(),
Data.isCString() ? Data.getAsCString() : "<encoded>");

if (StrSz >= Stack.LoopThreshold)
return processOnStackLoop(I, Op, G, Data);

std::vector<uint8_t> Encoded(StrSz);
std::vector<uint8_t> Key(StrSz);
std::generate(std::begin(Key), std::end(Key),
[&Dist, this]() { return Dist(*RNG); });

for (size_t I = 0; I < StrSz; ++I)
Encoded[I] = static_cast<uint8_t>(Str[I]) ^ static_cast<uint8_t>(Key[I]);

Constant *StrEnc = ConstantDataArray::get(I.getContext(), Encoded);
G.setInitializer(StrEnc);

EncodingInfo EI(EncodingTy::Stack);
EI.Key = std::move(Key);
auto It = GVarEncInfo.insert({&G, std::move(EI)}).first;
return injectOnStack(I, Op, G, *cast<ConstantDataSequential>(StrEnc),
It->getSecond());
return injectDecodingLocally(I, Op, G, *cast<ConstantDataSequential>(StrEnc),
It->getSecond());
}

} // end namespace omvll
4 changes: 2 additions & 2 deletions src/test/passes/string-encoding/config_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ def obfuscate_string(self, _, __, string: bytes):
if string.endswith(b".cpp"):
return omvll.StringEncOptGlobal()
if string.endswith(b"Swift"):
return omvll.StringEncOptStack()
return omvll.StringEncOptLocal()
if string.endswith(b"Stack"):
return omvll.StringEncOptStack()
return omvll.StringEncOptLocal()

@lru_cache(maxsize=1)
def omvll_get_config() -> omvll.ObfuscationConfig:
Expand Down

0 comments on commit 57cda4f

Please sign in to comment.