Skip to content

Commit 5ce7e23

Browse files
authored
[4.2.8] Multiple APK Analysis improvements, general Code QA & bug fixes (#2470)
* Dockerfile QA * Add sdk-build-tools to Docker image * Replace biplist with plistlib std lib * Fixed a bug in iOS pbxproj parsing * Added support for APK parsing with aapt2/aapt * Use aapt/aapt2 as a fallback for APK parsing, files listing and string extraction * Added "started at" to Scan task queue model #2463 * Tasks List API to return string status #2464 * Replaced all minidom calls with defusedxml.minidom * Code QA on android manifest data extraction and parsing * Improved android file analysis * Improved android manifest data extraction * Improved android icon file extraction * Improved android app name extraction * Improved android appstore package details extraction * Android string extraction to fallback on aapt2 strings * APK analysis arguments refactor * Handle packed APKs, refactor unzip to handle malformed APK files * Handle reserved filename conflict during ZIP extraction * Explicit Zipslip handling during ZIP extraction * Graceful files extraction on unzip failure * Removed bail out and continue analysis * Moved androguard parsing to the start of static analysis * AndroidManifest.xml fallback from apktool to androguard during extraction and parsing * Updated Tasks UI to show started at
1 parent a015df5 commit 5ce7e23

40 files changed

+1029
-598
lines changed

Dockerfile

+1-4
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,6 @@ ENV DEBIAN_FRONTEND=noninteractive \
2020
USER_ID=9901 \
2121
MOBSF_PLATFORM=docker \
2222
MOBSF_ADB_BINARY=/usr/bin/adb \
23-
JDK_FILE=openjdk-22.0.2_linux-x64_bin.tar.gz \
24-
JDK_FILE_ARM=openjdk-22.0.2_linux-aarch64_bin.tar.gz \
25-
WKH_FILE=wkhtmltox_0.12.6.1-3.bookworm_amd64.deb \
26-
WKH_FILE_ARM=wkhtmltox_0.12.6.1-3.bookworm_arm64.deb \
2723
JAVA_HOME=/jdk-22.0.2 \
2824
PATH=/jdk-22.0.2/bin:/root/.local/bin:$PATH \
2925
DJANGO_SUPERUSER_USERNAME=mobsf \
@@ -32,6 +28,7 @@ ENV DEBIAN_FRONTEND=noninteractive \
3228
# See https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#run
3329
RUN apt update -y && \
3430
apt install -y --no-install-recommends \
31+
android-sdk-build-tools \
3532
android-tools-adb \
3633
build-essential \
3734
curl \

mobsf/DynamicAnalyzer/views/common/device.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919
read_sqlite,
2020
)
2121

22-
from biplist import (
23-
writePlistToString,
22+
from plistlib import (
23+
FMT_XML,
24+
dumps,
2425
)
2526

26-
2727
logger = logging.getLogger(__name__)
2828

2929

@@ -57,7 +57,7 @@ def view_file(request, api=False):
5757
return print_n_send_error_response(request, err, api)
5858
dat = sfile.read_text('ISO-8859-1')
5959
if fil.endswith('.plist') and dat.startswith('bplist0'):
60-
dat = writePlistToString(dat).decode('utf-8', 'ignore')
60+
dat = dumps(dat, fmt=FMT_XML).decode('utf-8', 'ignore')
6161
if fil.endswith(('.xml', '.plist')) and typ in ['xml', 'plist']:
6262
rtyp = 'xml'
6363
elif typ == 'db':

mobsf/MobSF/init.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
logger = logging.getLogger(__name__)
2020

21-
VERSION = '4.2.7'
21+
VERSION = '4.2.8'
2222
BANNER = r"""
2323
__ __ _ ____ _____ _ _ ____
2424
| \/ | ___ | |__/ ___|| ___|_ _| || | |___ \

mobsf/MobSF/security.py

+28-1
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66
import sys
77
from shutil import which
88
from pathlib import Path
9+
from platform import system
910
from concurrent.futures import ThreadPoolExecutor
1011

11-
1212
from mobsf.MobSF.utils import (
13+
find_aapt,
1314
find_java_binary,
1415
gen_sha256_hash,
1516
get_adb,
@@ -72,9 +73,21 @@ def get_executable_hashes():
7273
downloaded_tools,
7374
manage_py,
7475
]
76+
aapt = 'aapt'
77+
aapt2 = 'aapt2'
78+
if system() == 'Windows':
79+
aapt = 'aapt.exe'
80+
aapt2 = 'aapt2.exe'
81+
aapts = [find_aapt(aapt), find_aapt(aapt2)]
82+
exec_loc.extend(Path(a) for a in aapts if a)
7583
# External binaries used directly by MobSF
7684
system_bins = [
85+
'aapt',
86+
'aapt.exe',
87+
'aapt2',
88+
'aapt2.exe',
7789
'adb',
90+
'adb.exe',
7891
'which',
7992
'wkhtmltopdf',
8093
'httptools',
@@ -110,6 +123,8 @@ def get_executable_hashes():
110123
settings.CLASSDUMP_BINARY,
111124
settings.CLASSDUMP_SWIFT_BINARY,
112125
getattr(settings, 'BUNDLE_TOOL', ''),
126+
getattr(settings, 'AAPT2_BINARY', ''),
127+
getattr(settings, 'AAPT_BINARY', ''),
113128
]
114129
for ubin in user_defined_bins:
115130
if ubin:
@@ -222,3 +237,15 @@ def sanitize_filename(filename):
222237
# Remove leading and trailing underscores
223238
safe_filename = safe_filename.strip('_')
224239
return safe_filename
240+
241+
242+
def sanitize_for_logging(filename: str, max_length: int = 255) -> str:
243+
"""Sanitize a filename to prevent log injection."""
244+
# Remove newline, carriage return, and other risky characters
245+
filename = filename.replace('\n', '_').replace('\r', '_').replace('\t', '_')
246+
247+
# Allow only safe characters (alphanumeric, underscore, dash, and period)
248+
filename = re.sub(r'[^a-zA-Z0-9._-]', '_', filename)
249+
250+
# Truncate filename to the maximum allowed length
251+
return filename[:max_length]

mobsf/MobSF/settings.py

+2
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,8 @@
462462
VD2SVG_BINARY = os.getenv('MOBSF_VD2SVG_BINARY', '')
463463
APKTOOL_BINARY = os.getenv('MOBSF_APKTOOL_BINARY', '')
464464
ADB_BINARY = os.getenv('MOBSF_ADB_BINARY', '')
465+
AAPT2_BINARY = os.getenv('MOBSF_AAPT2_BINARY', '')
466+
AAPT_BINARY = os.getenv('MOBSF_AAPT_BINARY', '')
465467

466468
# iOS 3P Tools
467469
JTOOL_BINARY = os.getenv('MOBSF_JTOOL_BINARY', '')

mobsf/MobSF/utils.py

+29
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,32 @@ def find_java_binary():
184184
return 'java'
185185

186186

187+
def find_aapt(tool_name):
188+
"""Find the specified tool (aapt or aapt2)."""
189+
# Check system PATH for the tool
190+
tool_path = shutil.which(tool_name)
191+
if tool_path:
192+
return tool_path
193+
194+
# Check common Android SDK locations
195+
home_dir = Path.home() # Get the user's home directory
196+
sdk_paths = [
197+
home_dir / 'Library' / 'Android' / 'sdk', # macOS
198+
home_dir / 'Android' / 'Sdk', # Linux
199+
home_dir / 'AppData' / 'Local' / 'Android' / 'Sdk', # Windows
200+
]
201+
202+
for sdk_path in sdk_paths:
203+
build_tools_path = sdk_path / 'build-tools'
204+
if build_tools_path.exists():
205+
for version in sorted(build_tools_path.iterdir(), reverse=True):
206+
tool_path = version / tool_name
207+
if tool_path.exists():
208+
return str(tool_path)
209+
210+
return None
211+
212+
187213
def print_n_send_error_response(request,
188214
msg,
189215
api=False,
@@ -667,6 +693,8 @@ def common_check(instance_id):
667693

668694
def is_path_traversal(user_input):
669695
"""Check for path traversal."""
696+
if not user_input:
697+
return False
670698
if (('../' in user_input)
671699
or ('%2e%2e' in user_input)
672700
or ('..' in user_input)
@@ -836,6 +864,7 @@ def get_android_dm_exception_msg():
836864

837865
def get_android_src_dir(app_dir, typ):
838866
"""Get Android source code location."""
867+
src = None
839868
if typ == 'apk':
840869
src = app_dir / 'java_source'
841870
elif typ == 'studio':

mobsf/StaticAnalyzer/models.py

+1
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ class EnqueuedTask(models.Model):
177177
file_name = models.CharField(max_length=255)
178178
created_at = models.DateTimeField(default=timezone.now)
179179
status = models.CharField(max_length=255, default='Enqueued')
180+
started_at = models.DateTimeField(null=True)
180181
completed_at = models.DateTimeField(null=True)
181182
app_name = models.CharField(max_length=255, default='')
182183

mobsf/StaticAnalyzer/tools/androguard4/resources/public.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# -*- coding: utf_8 -*-
22
# flake8: noqa
33
import os
4-
from xml.dom import minidom
4+
5+
from defusedxml.minidom import parseString
56

67
_public_res = None
78
# copy the newest sdk/platforms/android-?/data/res/values/public.xml here
@@ -11,7 +12,7 @@
1112
xmlfile = os.path.join(root, "public.xml")
1213
if os.path.isfile(xmlfile):
1314
with open(xmlfile, "r") as fp:
14-
_xml = minidom.parseString(fp.read())
15+
_xml = parseString(fp.read())
1516
for element in _xml.getElementsByTagName("public"):
1617
_type = element.getAttribute('type')
1718
_name = element.getAttribute('name')
+151
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
# -*- coding: utf_8 -*-
2+
"""Use aapt2 to extract APK features."""
3+
import re
4+
import logging
5+
import subprocess
6+
from platform import system
7+
from pathlib import Path
8+
9+
from django.conf import settings
10+
11+
from mobsf.MobSF.utils import (
12+
find_aapt,
13+
)
14+
15+
logger = logging.getLogger(__name__)
16+
17+
18+
class AndroidAAPT:
19+
20+
def __init__(self, apk_path):
21+
self.aapt2_path = None
22+
self.aapt_path = None
23+
self.apk_path = apk_path
24+
self.data = {
25+
'permissions': [],
26+
'uses_features': {},
27+
'package': None,
28+
'application_label': None,
29+
'application_icon': None,
30+
'launchable_activity': None,
31+
'min_sdk_version': None,
32+
'target_sdk_version': None,
33+
}
34+
35+
# Check for custom AAPT2 path in settings
36+
if (getattr(settings, 'AAPT2_BINARY', '')
37+
and len(settings.AAPT2_BINARY) > 0
38+
and Path(settings.AAPT2_BINARY).exists()):
39+
self.aapt2_path = settings.AAPT2_BINARY
40+
else:
41+
aapt2 = 'aapt2.exe' if system() == 'Windows' else 'aapt2'
42+
self.aapt2_path = find_aapt(aapt2)
43+
44+
# Check for custom AAPT path in settings
45+
if (getattr(settings, 'AAPT_BINARY', '')
46+
and len(settings.AAPT_BINARY) > 0
47+
and Path(settings.AAPT_BINARY).exists()):
48+
self.aapt_path = settings.AAPT_BINARY
49+
else:
50+
aapt = 'aapt.exe' if system() == 'Windows' else 'aapt'
51+
self.aapt_path = find_aapt(aapt)
52+
53+
# Ensure both aapt and aapt2 are found
54+
if not (self.aapt2_path and self.aapt_path):
55+
raise FileNotFoundError('aapt and aapt2 found')
56+
57+
def _execute_command(self, args):
58+
try:
59+
out = subprocess.check_output(
60+
args,
61+
stderr=subprocess.STDOUT)
62+
return out.decode('utf-8', errors='ignore')
63+
except subprocess.CalledProcessError as e:
64+
logger.warning(e.output)
65+
return None
66+
67+
def _get_strings(self, output):
68+
# Regex to match strings while ignoring paths (strings without slashes)
69+
pattern = r'String #[\d]+ : ([^\/\n]+)'
70+
matches = re.findall(pattern, output)
71+
# Strip whitespace and return the extracted strings
72+
return [match.strip() for match in matches]
73+
74+
def _parse_badging(self, output):
75+
# Match the package information
76+
package_match = re.search(r'package: name=\'([\w\.]+)\'', output)
77+
if package_match:
78+
self.data['package'] = package_match.group(1)
79+
80+
# Match permissions
81+
permissions = re.findall(r'uses-permission: name=\'([\w\.]+)\'', output)
82+
if permissions:
83+
self.data['permissions'] = permissions
84+
85+
# Match minSdkVersion
86+
min_sdk_match = re.search(r'minSdkVersion:\'(\d+)\'', output)
87+
if min_sdk_match:
88+
self.data['min_sdk_version'] = min_sdk_match.group(1)
89+
90+
# Match targetSdkVersion
91+
target_sdk_match = re.search(r'targetSdkVersion:\'(\d+)\'', output)
92+
if target_sdk_match:
93+
self.data['target_sdk_version'] = target_sdk_match.group(1)
94+
95+
# Match application label
96+
label_match = re.search(r'application-label(?:-[\w\-]+)?:\'([^\']+)\'', output)
97+
if label_match:
98+
self.data['application_label'] = label_match.group(1)
99+
100+
# Match application icon
101+
icon_match = re.search(r'application:.*icon=\'([^\']+)\'', output)
102+
if icon_match:
103+
self.data['application_icon'] = icon_match.group(1)
104+
105+
# Match launchable activity
106+
activity_match = re.search(r'launchable-activity: name=\'([\w\.]+)\'', output)
107+
if activity_match:
108+
self.data['launchable_activity'] = activity_match.group(1)
109+
110+
# Match used features
111+
features = {}
112+
feature_matches = re.findall(
113+
(r'(uses-feature(?:-not-required)?|uses-implied-feature): '
114+
r'name=\'([\w\.]+)\'(?: reason=\'([^\']+)\')?'),
115+
output,
116+
)
117+
for feature_type, feature_name, reason in feature_matches:
118+
features[feature_name] = {
119+
'type': feature_type,
120+
# e.g., 'uses-feature',
121+
# 'uses-feature-not-required',
122+
# 'uses-implied-feature'
123+
'reason': reason if reason else 'No reason provided',
124+
}
125+
self.data['uses_features'] = features
126+
127+
return self.data
128+
129+
def get_apk_files(self):
130+
"""List all files in the APK."""
131+
output = self._execute_command(
132+
[self.aapt_path, 'list', self.apk_path])
133+
if output:
134+
return output.splitlines()
135+
return []
136+
137+
def get_apk_strings(self):
138+
"""Extract strings from the APK."""
139+
output = self._execute_command(
140+
[self.aapt2_path, 'dump', 'strings', self.apk_path])
141+
if output:
142+
return self._get_strings(output)
143+
return []
144+
145+
def get_apk_features(self):
146+
"""Extract features from the APK."""
147+
output = self._execute_command(
148+
[self.aapt2_path, 'dump', 'badging', self.apk_path])
149+
if output:
150+
return self._parse_badging(output)
151+
return self.data

0 commit comments

Comments
 (0)