bilingual
DownUnderCTF 2025: rev/bilingual
Challenge Description
Two languages are better than one!
Regards, FozzieBear (cybears)
Approach & Solution
We are given this script bilingual.py:
DATA = "eNrtfQt8k0XW96RNei8p0mBBxIDBFhAoTXUrpZp........."
import argparse, base64, ctypes, zlib, pathlib, sys
PASSWORD = "cheese"
FLAG = "jqsD0um75+TyJR3z0GbHwBQ+PLIdSJ+rojVscEL4IYkCOZ6+a5H1duhcq+Ub9Oa+ZWKuL703"
KEY = "68592cb91784620be98eca41f825260c"
HELPER = None
def decrypt_flag(password):
A = "utf-8"
flag = bytearray(base64.b64decode(FLAG))
buffer = (ctypes.c_byte * len(flag)).from_buffer(flag)
key = ctypes.create_string_buffer(password.encode(A))
result = get_helper().Decrypt(key, len(key) - 1, buffer, len(buffer))
return flag.decode(A)
def get_helper():
global HELPER
if HELPER:
return HELPER
data = globals().get("DATA")
if data:
dll_path = pathlib.Path(__file__).parent / "hello.bin"
if not dll_path.is_file():
with open(dll_path, "wb") as dll_file:
dll_file.write(zlib.decompress(base64.b64decode(data)))
HELPER = ctypes.cdll.LoadLibrary(dll_path)
else:
0
return HELPER
def check_three(password):
return check_ex(password, "Check3")
def check_four(password):
return check_ex(password, "Check4")
def check_ex(password, func):
GetIntCallbackFn = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_wchar_p)
class CallbackTable(ctypes.Structure):
_fields_ = [("E", GetIntCallbackFn)]
@GetIntCallbackFn
def eval_int(v):
return int(eval(v))
table = CallbackTable(E=eval_int)
helper = get_helper()
helper[func].argtypes = [ctypes.POINTER(CallbackTable)]
helper[func].restype = ctypes.c_int
return helper[func](ctypes.byref(table))
def check_two(password):
@ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int)
def callback(i):
return ord(password[i - 3]) + 3
return get_helper().Check2(callback)
def check_one(password):
if len(password) != 12:
return False
return get_helper().Check1(password) != 0
def check_password(password):
global PASSWORD
PASSWORD = password
checks = [check_one, check_two, check_three, check_four]
result = True
for check in checks:
result = result and check(password)
return result
def main():
parser = argparse.ArgumentParser(description="CTF Challenge")
parser.add_argument("password", help="Enter the password")
args = parser.parse_args()
if check_password(args.password):
flag = decrypt_flag(args.password)
print("Correct! The flag is DUCTF{%s}" % flag)
return 0
else:
print("That is not correct")
return 1
if __name__ == "__main__":
sys.exit(main())
When running this on Linux, I encountered an error due to an invalid ELF header. This suggests that hello.bin is not a native Linux binary, but rather a Windows DLL or some other non-ELF format. The script attempts to load it using ctypes.cdll.LoadLibrary, which confirms it’s expecting a shared library (DLL) to call functions . This behavior is evident in the get_helper function.
The Role of get_helper in Library Loading
def get_helper():
global HELPER
if HELPER:
return HELPER
data = globals().get("DATA")
if data:
dll_path = pathlib.Path(__file__).parent / "hello.bin"
if not dll_path.is_file():
with open(dll_path, "wb") as dll_file:
dll_file.write(zlib.decompress(base64.b64decode(data)))
HELPER = ctypes.cdll.LoadLibrary(dll_path)
else:
0
return HELPER
This function performs three main operations:
Dynamic Extraction
- Uses a global
HELPER
variable to ensure the library is only loaded once - The binary library is embedded in the script as base64-encoded, zlib-compressed data in the
DATA
variable
File Extraction
If hello.bin
doesn’t exist, it:
- Base64 decodes the
DATA
string - Decompresses it using zlib
- Writes the binary data to
hello.bin
Library Loading
- Uses
ctypes.cdll.LoadLibrary()
to load the extracted binary as a shared library - Returns the loaded library object for calling its functions
def check_password(password):
global PASSWORD
PASSWORD = password
checks = [check_one, check_two, check_three, check_four]
result = True
for check in checks:
result = result and check(password)
return result
def main():
parser = argparse.ArgumentParser(description="CTF Challenge")
parser.add_argument("password", help="Enter the password")
args = parser.parse_args()
if check_password(args.password):
flag = decrypt_flag(args.password)
print("Correct! The flag is DUCTF{%s}" % flag)
return 0
else:
print("That is not correct")
return 1
Once the library is loaded, the program begins validating the password through a sequence of checks: check_one
, check_two
, check_three
, and check_four
. If all these checks pass, the program proceeds to call the Decrypt
function from the shared DLL.
The decryption function uses the RC4 algorithm, implemented as follows:
Decrypt :
int Decrypt(uint8_t *key, int keylen, uint8_t *data, int len) {
// RC4 is implemented using AVX SIMD instructions in the original code.
// We will discuss how we recovered this implementation later, as it will be important for subsequent analysis.
uint8_t s[256];
int i, j = 0;
for (i = 0; i < 256; i++)
s[i] = i;
for (i = 0; i < 256; i++) {
j = (j + s[i] + key[i % keylen]) % 256;
uint8_t tmp = s[i];
s[i] = s[j];
s[j] = tmp;
}
i = j = 0;
for (int x = 0; x < len; x++) {
i = (i + 1) % 256;
j = (j + s[i]) % 256;
uint8_t tmp = s[i];
s[i] = s[j];
s[j] = tmp;
data[x] ^= s[(s[i] + s[j]) % 256];
}
}
}
The Decrypt
function is invoked as:
result = get_helper().Decrypt(key, len(key) - 1, buffer, len(buffer));
This means that decryption is impossible without first recovering the correct key, making the validation checks critical for obtaining the final flag.
Let’s walk through each check step by step.
Check1 :
def check_one(password):
if len(password) != 12:
return False
return get_helper().Check1(password) != 0
It checks the password length (must equal 12), then calls Check1
from the loaded library from hello.bin
. Let’s examine it:
int64_t Check1(char* arg1)
{
char rdx = *(uint8_t*)arg1;
int64_t result;
result = (rdx ^ 0x43) == 0xb;
data_180009000 = rdx | 0x72;
return result;
}
It takes char rdx = *(uint8_t*)arg1
, which interprets the first character of arg1
(i.e., arg1[0]
, which is password[0]
) as an ASCII value and stores it in rdx
. The function returns 1 (true) if (rdx ^ 0x43) == 0x0b
, which implies rdx == 0x0b ^ 0x43 = 0x48
, i.e., ‘H’.
Results
So, password[0] = 'H'
(ASCII 0x48).
It also sets data_180009000 = 0x48 | 0x72
to data_180009000 = 0x7a
.
Check2 :
def check_two(password):
@ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int)
def callback(i):
return ord(password[i - 3]) + 3
return get_helper().Check2(callback)
uint64_t Check2(int64_t arg1)
{
int32_t rsi = 0;
char rbp = arg1(8) ^ data_180009000;
int32_t rbx;
rbx = rbp == 9;
rbp += arg1(9);
if (rbp == 0x74)
rsi = rbx;
data_180009001 = ~(rbp + 0x1e);
return (uint64_t)rsi;
}
Explanation
The function Check2
receives a callback (defined in Python) which returns ord(password[i - 3]) + 3
. This means:
callback(8) → ord(password[5]) + 3
callback(9) → ord(password[6]) + 3
It computes:
rbp = callback(8) ^ data_180009000
Since data_180009000 = 0x7a
, we want:
(ord(password[5]) + 3) ^ 0x7a == 9
→ ord(password[5]) + 3 = 0x7a ^ 0x09 = 0x73
→ ord(password[5]) = 0x70
→ password[5] = 'p'
Then it adds the result of callback(9)
:
rbp += ord(password[6]) + 3
To satisfy the condition rbp == 0x74
, we need:
9 + ord(password[6]) + 3 = 0x74
→ ord(password[6]) = 0x68
→ password[6] = 'h'
If both conditions are met:
rbp == 9
before the addition, andrbp == 0x74
after the addition
Then rsi
is set to 1, and the function returns 1.
Before returning, the function sets a global value:
data_180009001 = ~(rbp + 0x1e)
Since rbp == 0x74
, this becomes:
data_180009001 = ~0x92 = 0xFFFFFFFFFFFFFF6D
But if data_180009001
is a one-byte variable (e.g., char), only the least significant byte is stored:
data_180009001 = 0x6D
// ASCII ’m'
Results
To pass check_two
, the password must satisfy:
password[5] = 'p'
password[6] = 'h'
- Side effect:
data_180009001
is set to0x6D
(’m')
So what we have so far is:
data_180009001 = 0x6D
data_180009000 = 0x7a
password[0] = 'H'
password[5] = 'p'
password[6] = 'h'
Check3 :
def check_three(password):
return check_ex(password, "Check3")
To understand check_three
, we need to understand check_ex
first:
def check_ex(password, func):
GetIntCallbackFn = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_wchar_p)
class CallbackTable(ctypes.Structure):
_fields_ = [("E", GetIntCallbackFn)]
@GetIntCallbackFn
def eval_int(v):
return int(eval(v))
table = CallbackTable(E=eval_int)
helper = get_helper()
helper[func].argtypes = [ctypes.POINTER(CallbackTable)]
helper[func].restype = ctypes.c_int
return helper[func](ctypes.byref(table))
We notice three main operations:
1. Creates Callback System
- Defines a function pointer type that takes a string and returns an integer
- Creates a structure to hold this callback function
2. Implements Evaluation Callback
eval_int(v)
receives string expressions from the DLL- Executes them as Python code using
eval()
- Returns the result as an integer
3. Calls DLL Function
- Passes the callback structure to the specified DLL function
- The DLL can now send Python expressions back to be evaluated
Let’s see how it’s called in the shared library:
int64_t check3(int64_t arg1) {
char buffer1[0x6c8];
int64_t cookie = __security_cookie ^ (int64_t)&buffer1;
// Initial data block setup
uint8_t data[16] = {0};
*(uint32_t*)(data + 4) = 0x530053; // 'S\0S'
*(uint16_t*)(data + 8) = 0x57; // 'W'
int i = 0;
uint16_t* out = (uint16_t*)data;
while (i < 8) {
uint8_t result = 0;
switch (i) {
case 0:
result = data[6] ^ 0x03;
break;
case 1:
result = data[0] ^ 0x11;
break;
case 5:
result = data[8] ^ 0x18;
break;
case 6:
result = data[10] ^ 0x1D;
break;
case 7:
result = data[12] ^ 0x16;
break;
}
if (result)
*out = (uint16_t)result;
out++;
i++;
}
// Prepare and parse input strings
char format[24] = "ord(%s[%d])";
char input[128] = {0};
char output[24] = {0};
uint16_t* resultData = (uint16_t*)output;
int index = 0;
while (index < 12) {
snprintf(input, sizeof(input), format, (char*)data, index);
resultData[index] = ((int (*)(char*))arg1)(input);
index++;
}
// Collect specific characters from buffer
char a = output[12];
char b = output[14];
char c = output[4];
char d = output[6];
// Prepare comparison format
char cmp_buffer[1024] = {0};
char extra[256] = {0};
snprintf(cmp_buffer, sizeof(cmp_buffer), "%d + 2 == %d and %d == %d and (...)", b);
for (int k = 0; k < 3; ++k) {
snprintf(extra, sizeof(extra), " and %d > 48 and %d < 57", ((char*)&a)[k], ((char*)&a)[k]);
strcat(cmp_buffer, extra);
}
int64_t result = ((int (*)(char*))arg1)(cmp_buffer);
__security_check_cookie(cookie ^ (int64_t)&buffer1);
return result;
}
It constructs something like:
"%d + 2 == %d and %d == %d and (%d > 48 and %d < 57) and %d > 48 and %d < 57"
and passes it to eval.
I dumped the v
value and passed unique characters as input to find the constraints. The v
was like:
108 + 2 == 105 and 101 == 108 and (105 - b) == 105 and 101 > 48 and 101 < 57 and 108 > 48 and 108 < 57 and 105 > 48 and 105 < 57
So I mapped them to positions and got:
Extracted Constraints
password[8] + 2 == password[11]
password[7] == password[8]
password[11] - eval(password[4]) == password[11]
50<ord(password[11])<57
48<ord(password[7])<57
48<ord(password[8])<57
Check4 :
Overview
def check_four(password):
return check_ex(password, "Check4")
Calls check_ex with “Check4” as the function name and sets up a callback system that allows the native Check4 function in the DLL to execute Python expressions.
The arg1 parameter passed to Check4 is a pointer to a callback table containing the function eval_int, defined as:
def eval_int(v):
return int(eval(v))
This means any time the native code calls (*arg1)(value), it triggers a call to eval_int(value), effectively executing eval(value) in Python. For example, in the disassembly:
__builtin_wcscpy(dest: &var_270, src: u"ord(PASSWORD[1])")
...
char rax_2 = (*arg1)(&var_270)
The string “ord(PASSWORD[1])” is passed to the callback, resulting in a call to eval(“ord(PASSWORD[1])”), which returns the corresponding integer to the native code.
Let’s look at the check4
function:
here is the full function Check4.c
after variables setup
if (j_sub_180002060(&var_298, 0x1a, &s, &data_180009000, 2, var_2a0) == 0)
result = 0
We can see that j_sub_180002060
is called three times. What is it?
int64_t sub_180002060(int64_t arg1, int64_t arg2, char* arg3, int64_t arg4, int32_t arg5, int32_t arg6)
memcpy(dest: arg3, src: arg1, count: arg2.d)
j_sub_1800013f0(arg4, arg5, arg3, arg2.d) // this function jumps to sub_1800013f0 function
int32_t result
result.b = j_sub_180001630(arg3, arg2 u>> 1) == arg6
return result
It calls two functions:
1- sub_1800013f0
, which is a SIMD-optimized implementation of the RC4 encryption/decryption algorithm.
I cleaned and simplified it into a standard C version:
void rc4(uint8_t *key, int keylen, uint8_t *data, int len) {
uint8_t s[256];
int i, j = 0;
for (i = 0; i < 256; i++) s[i] = i;
for (i = 0; i < 256; i++) {
j = (j + s[i] + key[i % keylen]) % 256;
uint8_t tmp = s[i]; s[i] = s[j]; s[j] = tmp;
}
i = j = 0;
for (int x = 0; x < len; x++) {
i = (i + 1) % 256;
j = (j + s[i]) % 256;
uint8_t tmp = s[i]; s[i] = s[j]; s[j] = tmp;
data[x] ^= s[(s[i] + s[j]) % 256];
}
}
2- The second call is a hash check:
int32_t result = (sub_180001630(arg3, arg2 >> 1) == arg6);
Which corresponds to the following logic:
result.b = hash_utf16_string(dataa, dataLength u>> 1) == expected_hash;
And here is the cleaned hash implementation:
uint32_t hash_function(uint8_t *data, int len) {
uint32_t result = 0x1505;
for (int i = 0; i < len; i++) {
result = (result * 0x21) ^ data[i];
}
return result;
}
uint32_t hash_utf16_string(uint8_t *data, int len) {
int i;
for (i = 0; i + 1 < len; i += 2) {
if (data[i] == 0 && data[i + 1] == 0)
break;
}
return hash_function(data, i);
}
This is essentially a DJB2 Hash Function adapted for UTF-16 encoded input, used for hash verification after decryption.
Putting it all together, the original function j_sub_180002060 can be understood and renamed as: DecryptRC4andCheckHash
int64_t DecryptRC4andCheckHash(int64_t data, int64_t dataLength, char* dataa, int64_t key, int32_t keyLength, int32_t expected_hash)
{
memcpy(dest: dataa, src: data, count: dataLength.d);
rc4(key, keyLength, dataa, dataLength.d);
int32_t result;
result.b = hash_utf16_string(dataa, dataLength u>> 1) == expected_hash;
return result;
}
Which is essentially:
void rc4(uint8_t *key, int keylen, uint8_t *data, int len) {
uint8_t s[256];
int i, j = 0;
for (i = 0; i < 256; i++) s[i] = i;
for (i = 0; i < 256; i++) {
j = (j + s[i] + key[i % keylen]) % 256;
uint8_t tmp = s[i]; s[i] = s[j]; s[j] = tmp;
}
i = j = 0;
for (int x = 0; x < len; x++) {
i = (i + 1) % 256;
j = (j + s[i]) % 256;
uint8_t tmp = s[i]; s[i] = s[j]; s[j] = tmp;
data[x] ^= s[(s[i] + s[j]) % 256];
}
}
uint32_t hash_function(uint8_t *data, int len) {
uint32_t result = 0x1505;
for (int i = 0; i < len; i++) {
result = (result * 0x21) ^ data[i];
}
return result;
}
uint32_t hash_utf16_string(uint8_t *data, int len) {
int i;
for (i = 0; i + 1 < len; i += 2) {
if (data[i] == 0 && data[i + 1] == 0)
break;
}
return hash_function(data, i);
}
memcpy(data, ciphertext, DATA_LEN);
rc4(key, 8, data, DATA_LEN);
uint32_t h = hash_utf16_string(data, DATA_LEN);
if (h == EXPECTED_HASH) {
return 1;
} else {
return 0;
}
This function simply:
Copies the encrypted data into a buffer.
Decrypts it using RC4 with a given key.
Computes a hash of the decrypted data and compares it to a known hash.
The function DecryptRC4AndCheckHash
is invoked three times, and in each call, it performs a stage of decryption.
To proceed through each stage successfully, we must recover the following for each:
key – the decryption key
key_length – the length of the key
enc_data – the encrypted data buffer
length_enc_data – the length of the encrypted data
Each stage uses this data in a DecryptRC4andCheckHash(...)
call to:
Decrypt the data using RC4,
And verify it using a DJB2-based hash function.
If the hash check passes, additional logic is executed . Otherwise, the function exits early.
So, in summary:
we must extract or reverse the correct (key, key_length, enc_data, length_enc_data) tuple for all three stages to reach the function’s final logic.
Stage One
__builtin_wcscpy(dest: &var_270, src: u"ord(PASSWORD[1])");
__builtin_wcscpy(dest: &var_248, src: u"ord(PASSWORD[2])");
__builtin_wcscpy(dest: &var_220, src: u"ord(PASSWORD[3])");
char ord1 = (*arg1)(&var_270);
char ord2 = (*arg1)(&var_248);
char ord3 = (*arg1)(&var_220);
In this stage, the function prepares the values needed for decryption:
It loads a hardcoded 26-byte encrypted buffer (enc_data) into
var_298
.It sets the key pointer to
&data_180009000
and key length to 2.It initializes an empty buffer s to receive the decrypted output.
It sets the hash constant to
0x6293def8
.
if (_DecryptRC4andCheckHash(&var_298, 0x1a, &s, &data_180009000, 2, 0x6293def8) == 0)
We know the parameters:
Key: 2 bytes from
data_180009000
key = { 0x6d, 0x7a }
Encrypted data: 26 bytes stored in
var_298
Expected hash:
0x6293def8
Here’s the equivalent C implementation:
int main() {
uint8_t ciphertext[DATA_LEN] = {
0xf2, 0x1e, 0x2a, 0xf4, 0x21, 0xef, 0xf7, 0x29, 0x1b, 0x8b,
0x96, 0x17, 0x78, 0x8b, 0x32, 0x90, 0x87, 0xb4, 0x58, 0xb5,
0xe1, 0xed, 0xb9, 0x48, 0x3e, 0xd9
};
uint8_t key[2] = {0x6d, 0x7a};
uint8_t data[DATA_LEN];
memcpy(data, ciphertext, DATA_LEN);
rc4(key, 2, data, DATA_LEN);
uint32_t h = hash_utf16_string(data, DATA_LEN);
if (h == 0x6293def8) {
printf("[+] Decrypted data: ");
for (int i = 0; i < DATA_LEN; i++) putchar(data[i]);
printf("\n");
return 0;
}
return 1;
}
The decrypted data is interpreted as int(KEY[0:4])
— that is, the first 4 bytes of the key, treated as a little-endian integer.
Result:
Decrypted value: interpreted as int(KEY[0:4])
Stage Two
If the check passes from stage one , it evaluates (*arg1)(&ord9)
which is eval(int(KEY[0:4]))
:
int32_t rax_3 = (*arg1)(&ord9); // eval(int(KEY[0:4]))
data_180009004 = ord1;
data_180009005 = ord2;
Then it prepares decryption-related values:
var_298 = 0x5ac1e9d0;
data_180009003 = (rax_3 >> 3) ^ 0x36;
data_180009006 = ord3 ^ ord1 ^ ord2 ^ 0x10;
Static values written to the stack:
int32_t var_294_1 = 0x31280c9e;
__builtin_strncpy(&var_290, "X$]h", 4);
__builtin_memcpy(&var_28c,
"\x54\x8d\x6f\xe7\xf6\xdb\xd7\xe5\xc0\x4b\x28\x46"
"\xe7\xa4\x7e\xcd\x07\xf8\xf4\x41", 0x14);
The variable var_f8 is cleared with a memset, and then passed as the output buffer for decryption:
memset(&var_f8, 0, 0xc0); // clear buffer
if (_DecryptRC4andCheckHash(&var_298, 0x20, &var_f8, &data_180009000, 8, 0x69fa99d) == 0)
But from the disassembly, we realize the actual ciphertext is stored directly on the stack, starting at rsp+0x30, and var_2..
was parts from the 0x20
bytes . The full encrypted payload is loaded as follows:
mov dword [rsp+0x30], 0x5ac1e9d0
mov dword [rsp+0x34], 0x31280c9e
mov dword [rsp+0x38], 0x685d2458
mov dword [rsp+0x3c], 0xe76f8d54
mov dword [rsp+0x40], 0xe5d7dbf6
mov dword [rsp+0x44], 0x46284bc0
mov dword [rsp+0x48], 0xcd7ea4e7
mov dword [rsp+0x4c], 0x41f4f807
Putting it all together, the encryped_data is:
uint8_t ciphertext[32] = {
0xd0, 0xe9, 0xc1, 0x5a,
0x9e, 0x0c, 0x28, 0x31,
0x58, 0x24, 0x5d, 0x68,
0x54, 0x8d, 0x6f, 0xe7,
0xf6, 0xdb, 0xd7, 0xe5,
0xc0, 0x4b, 0x28, 0x46,
0xe7, 0xa4, 0x7e, 0xcd,
0x07, 0xf8, 0xf4, 0x41
};
The RC4 key has a length of 8 bytes. It is constructed as follows:
data_180009000 = 0x7a
data_180009001 = 0x6d
data_180009002 = 0xcc
data_180009003 = (key0to4 >> 3) ^ 0x36
data_180009004 = ord1
data_180009005 = ord2
data_180009006 = ord3 ^ ord1 ^ ord2 ^ 0x10
data_180009007 = 0xcc
We’ll brute-force the three unknown printable bytes: ord1
, ord2
, and ord3
.
ord1
,ord2
, andord3
are constrained to printable ASCII (0x20
to0x7E
)This gives us 95 options per byte, like:
ord1 = 0x20 .. 0x7E
ord2 = 0x20 .. 0x7E
ord3 = 0x20 .. 0x7E
Total combinations:
95 × 95 × 95 = 857,375
Small enough to brute-force efficiently. and this is the result :
uint8_t key[8] = { 0x7a, 0x6d, 0xcc, 0x6f, 0x79, 0x64, 0x7f, 0xcc };
So we know:
password[0] = 0x48 # 'H'
password[1] = 0x79 # 'y'
password[2] = 0x64 # 'd'
password[3] = 0x7f ^ 0x64 ^ 0x79 ^ 0x10 = 0x72 # 'r'
password[4] = '0'
password[5] = 'p'
password[6] = 'h'
password[7] = password[11] - 2 # and in range '1'...'9'
password[8] = password[7]
password[9] = unknown
password[10] = unknown
password[11] = in range '3' to '9'
While brute-forcing the two bytes at indices 9 and 10 could have revealed the flag, I chose to fully analyze and complete the challenge for a deeper understanding
After decryption, we observe that the decrypted data is interpreted as ord(PASSWORD[9])
.
Then, the following logic is executed:
if (!RC4decryptCheckHash(&enc_data1, 0x20, &ord9, &data_180009000, 8, 0x69fa99d))
result = 0;
else
{
int16_t ordd9 = (*(uint64_t*)arg1)(&ord9); // result of eval(ord(PASSWORD[9]))
int112_t var_1e8;
__builtin_wcscpy(&var_1e8, L"11:13"); // Unicode string "11:13"
if (((key0to4 & 0x64) ^ (uint32_t)ordd9) != (*(uint64_t*)arg1)(&key0to4int))
result = 0;
This part of the code was initially misunderstood during static analysis, so we took another look at the assembly:
lea rcx, [rbp+0xd0] ; RCX = &ord9
call [rsi] ; eax = eval(&ord9)
mov ebx, eax ; store result in ebx → this is ordd9
then:
lea rcx, [rbp-0x30] ; RCX = &key0to4int
mov dword [rbp-0x20], 0x310031 ; Unicode '1'
mov dword [rbp-0x1c], 0x31003a ; Unicode ':', '1'
mov word [rbp-0x18], r13w ; Unicode '3'
This shows that the code overwrites the 0:4
from int(KEY[0:4])
buffer with the wide string “11:13”, meaning it’s evaluating int(KEY[11:13])
.
The comparison performed is:
if (((key0to4 & 0x64) ^ ordd9) != eval("int(KEY[11:13])"))
Solving the equation:
ordd9 = (6859 & 0x64) ^ 46 = 110
Which gives:
ord9 = 0x6e // 'n' PASSWORD[9]='n'
Only one byte is left now. We could brute-force it, but let’s stay true to the spirit of the challenge and finish it analytically.
Stage Three
In the same way, I extracted the encrypted data:
ciphertext[DATA_LEN] = {
0xd6,0xe9,0xdd,0x5a,0x8e,0x0c,0x28,0x31,0x43,0x24,0x59,0x68,0x5e,0x8d,
0x67,0xe7,0x91,0xdb,0xa2,0xe5,0xa0,0x4b,0x31,0x46,0x90,0xa4,0x67,0xcd,
0x6b,0xf8,0xeb,0x41,0x20,0x94
};
Using the same key:
uint8_t key[8] = { 0x7a, 0x6d, 0xcc, 0x6f, 0x79, 0x64, 0x7f, 0xcc };
This decrypted to int(key[0:2])
.
In this part, I dumped the last eval and saw ord(PASSWORD[10])
instead of ord(PASSWORD[9])
.
result_1 = (uint32_t)orddd9 == (*(uint64_t*)arg1)(&key02) - 7;
So finally ord(PASSWORD[10]) == eval(int(KEY[0:2], 16)) - 7
is ord(PASSWORD[10]) = 97
(‘a’).
Deduced Password Characters
password[0] = 0x48; // 'H'
password[1] = 0x79; // 'y'
password[2] = 0x64; // 'd'
password[3] = 0x7f ^ 0x64 ^ 0x79 ^ 0x10; // = 0x72 → 'r'
password[4] = 0x30; // '0'
password[5] = 0x70; // 'p'
password[6] = 0x68; // 'h'
password[7] = password[11] - 2; // '1' to '7'
password[8] = password[7]; // same as password[7]
password[9] = 0x6e; // 'n'
password[10] = 0x61; // 'a'
password[11] = 0x32 to 0x39; // '2' to '9'
And the cleaned version of the Check4 function is:
#include <stdio.h>
#include <stdint.h>
#include <string.h>
void rc4(uint8_t *key, int keylen, uint8_t *data, int len) {
uint8_t s[256];
int i, j = 0;
for (i = 0; i < 256; i++) s[i] = i;
for (i = 0; i < 256; i++) {
j = (j + s[i] + key[i % keylen]) % 256;
uint8_t tmp = s[i]; s[i] = s[j]; s[j] = tmp;
}
i = j = 0;
for (int x = 0; x < len; x++) {
i = (i + 1) % 256;
j = (j + s[i]) % 256;
uint8_t tmp = s[i]; s[i] = s[j]; s[j] = tmp;
data[x] ^= s[(s[i] + s[j]) % 256];
}
}
uint32_t hash_function(uint8_t *data, int len) {
uint32_t result = 0x1505;
for (int i = 0; i < len; i++) {
result = (result * 0x21) ^ data[i];
}
return result;
}
uint32_t hash_utf16_string(uint8_t *data, int len) {
int i;
for (i = 0; i + 1 < len; i += 2) {
if (data[i] == 0 && data[i + 1] == 0)
break;
}
return hash_function(data, i);
}
int RC4decryptCheckHash(uint8_t *enc_data, int enc_len, uint8_t *key, uint8_t *data_base, int key_len, uint32_t expected_hash) {
uint8_t decrypted[256];
memcpy(decrypted, enc_data, enc_len);
uint8_t rc4_key[256];
memcpy(rc4_key, key, key_len);
for (int i = 0; i < key_len && i < 8; i++) {
rc4_key[i + key_len] = data_base[i];
}
rc4(rc4_key, key_len + 8, decrypted, enc_len);
uint32_t h = hash_utf16_string(decrypted, enc_len);
if (h == expected_hash) {
return 1;
}
return 0;
}
uint64_t Check4(int64_t arg1) {
uint8_t data_180009000 = 0x7a;
uint8_t data_180009001 = 0x6d;
uint8_t data_180009002 = 0xcc;
uint8_t data_180009003;
uint8_t data_180009004;
uint8_t data_180009005;
uint8_t data_180009006;
uint8_t data_180009007 = 0xcc;
uint8_t data_base[8] = {data_180009000, data_180009001, data_180009002, data_180009003,
data_180009004, data_180009005, data_180009006, data_180009007};
char ord1 = (*(uint64_t*)arg1)("ord(PASSWORD[1])");
char ord2 = (*(uint64_t*)arg1)("ord(PASSWORD[2])");
char ord3 = (*(uint64_t*)arg1)("ord(PASSWORD[3])");
uint32_t expected_hash1 = 0x6293def8;
uint8_t enc_data1[] = {0xf2, 0x1e, 0x2a, 0xf4, 0x21, 0xef, 0xf7, 0x29, 0x1b, 0x8b,
0x96, 0x17, 0x78, 0x8b, 0x32, 0x90, 0x87, 0xb4, 0x58, 0xb5,
0xe1, 0xed, 0xb9, 0x48, 0x3e, 0xd9, 0x1a};
uint8_t key0to4int[30];
memset(key0to4int, 0, 30);
if (!RC4decryptCheckHash(enc_data1, 0x1a, key0to4int, data_base, 2, expected_hash1)) {
return 0;
}
int32_t key0to4 = (*(uint64_t*)arg1)(key0to4int);
data_180009004 = ord1;
data_180009005 = ord2;
data_180009003 = (int8_t)(key0to4 >> 3) ^ 0x36;
data_180009006 = ord3 ^ ord1 ^ ord2 ^ 0x10;
uint8_t enc_data2[] = {0xd0,0xe9,0xc1,0x5a,0x9e,0x0c,0x28,0x31,0x58,0x24,0x5d,0x68,0x54,0x8d,0x6f,0xe7,
0xf6,0xdb,0xd7,0xe5,0xc0,0x4b,0x28,0x46,0xe7,0xa4,0x7e,0xcd,0x07,0xf8,0xf4,0x41};
uint8_t ord9[192];
memset(ord9, 0, 192);
if (!RC4decryptCheckHash(enc_data2, 0x20, ord9, data_base, 8, 0x69fa99d)) {
return 0;
}
int16_t ordd9 = (*(uint64_t*)arg1)(ord9);
if (((key0to4 & 0x64) ^ (uint32_t)ordd9) != (*(uint64_t*)arg1)("int(KEY[11:13])")) {
return 0;
}
int16_t ord10 = (*(uint64_t*)arg1)(ord9);
uint32_t var_2a0_1 = 0xa7d53695;
uint8_t enc_data3[] = {0xd6,0xe9,0xdd,0x5a,0x8e,0x0c,0x28,0x31,0x43,0x24,0x59,0x68,0x5e,0x8d,0x67,0xe7,
0x91,0xdb,0xa2,0xe5,0xa0,0x4b,0x31,0x46,0x90,0xa4,0x67,0xcd,0x6b,0xf8,0xeb,0x41,0x20,0x94};
uint8_t key02[128];
memset(key02, 0, 128);
if (!RC4decryptCheckHash(enc_data3, 0x22, key02, data_base, 8, var_2a0_1)) {
return 0;
}
int32_t result = (uint32_t)ord10 == (*(uint64_t*)arg1)("int(KEY[0:2],16)") - 7;
return (uint64_t)result;
}
We have 7 possible passwords now:
Hydr0ph00na2
Hydr0ph11na3
Hydr0ph22na4
Hydr0ph33na5
Hydr0ph44na6
Hydr0ph55na7
Hydr0ph66na8
Hydr0ph77na9
And Hydr0ph11na3
was the correct one:
Decrypting the flag
we can use a simple Python script to decrypt the RC4-encrypted base64 string:
import base64
import itertools
import os
FLAG = "jqsD0um75+TyJR3z0GbHwBQ+PLIdSJ+rojVscEL4IYkCOZ6+a5H1duhcq+Ub9Oa+ZWKuL703"
flag_bytes = base64.b64decode(FLAG)
def rc4(key, data):
S = list(range(256))
j = 0
key = [ord(c) for c in key]
for i in range(256):
j = (j + S[i] + key[i % len(key)]) % 256
S[i], S[j] = S[j], S[i]
i = j = 0
out = []
for byte in data:
i = (i + 1) % 256
j = (j + S[i]) % 256
S[i], S[j] = S[j], S[i]
out.append(byte ^ S[(S[i] + S[j]) % 256])
return bytes(out)
print(rc4("Hydr0ph11na3",flag_bytes))
Output:
b'the_problem_with_dynamic_languages_is_you_cant_c_types'
or simply pass it into bilingual.py
:
PS E:\projects\bilingual> python new.py Hydr0ph11na3
Correct! The flag is DUCTF{the_problem_with_dynamic_languages_is_you_cant_c_types}
PS E:\projects\bilingual>
Thanks to the DownUnderCTF organizers and challenge authors for this nice challenge.
Regards,FozzieBear (cybears)