内容纲要

Androguard

用法

获取manifest文件并分析属性

self.apk.get_AndroidManifest().toxml('utf-8')
mani_attrs = self.apk.get_android_manifest_axml().get_xml_obj()

属性:

mani_attrs = self.apk.get_android_manifest_axml().get_xml_obj().xpath('//application')
for attr in attrs:
    ns = '{http://schemas.android.com/apk/res/android}'
    tag_android_allowBackup = attr.get(ns + 'debuggable')

分析apk获取dex、类、方法

a, ds, dx = AnalyzeAPK(self.apk_path)
_all_classes = []
for d in ds:
    _all_classes.extend(d.get_classes())
for _class in _all_classes:
    class_name = _class.get_name()
    if b'google' in class_name or b'androidx' in class_name or b'support' in class_name:
        continue
    _methods = _class.get_methods()
    for _method in _methods:
        _instructions = _method.get_instructions()

获取资源文件并分析

self.res_parser = self.apk.get_android_resources()
#具体实现查看ARSCParser的api方法
# 获取资源的string类型
strings = self.res_parser.get_strings_resources().decode('utf-8')
strs = strings.split('\n')

获得apk下的所有文件(res、layout……)

files = self.apk.get_files()

xml分析

layout_xml = AXMLPrinter(layout_byte)
layout_etree = layout_xml.get_xml_obj()
layout_ll = layout_etree.xpath('//*')
# 获取节点属性
i.get(self.ns + 'text')

案例

报废项目,有api的bug与未完成的业务,但涉及的文件分析比较全面

import os
import re

from androguard.core.bytecodes.apk import APK
from androguard.core.bytecodes.axml import AXMLParser, AXMLPrinter
from androguard.misc import AnalyzeAPK
from androguard.core.analysis.analysis import Analysis, MethodClassAnalysis

# -*- coding:utf-8 -*-

class NormalFind3pSDK:
    """
    ①在资源文件下找到“第三方SDK目录”等字样,获取该资源的值
    ②遍历layout,找到android:text为该值的layout,得到其android:id
    ③遍历类,寻找有绑定该id且onclick行为的类
    ④对onclick方法中的调用进行遍历收集字符串
    """
    def __init__(self, apk_path):
        self.charact_str_list = ['第三方SDK目录', '第三方信息共享清单', '信息收集清单及接入合作方目录', '第三方个人信息共享清单']
        self.apk_path = apk_path
        self.apk = APK(apk_path)
        self.ns = '{http://schemas.android.com/apk/res/android}'
        self.res_parser = self.apk.get_android_resources()
        self.package_name = self.apk.package

    def find_resources_id(self, crct_str):
        # str = self.apk._get_res_string_value('@string/@setting_third_sdk_list')
        str_ids = []
        strings = self.res_parser.get_strings_resources().decode('utf-8')
        strs = strings.split('\n')
        for _str in strs:
            if crct_str in _str:
                # print(_str)
                str_id = _str.split('"')[1]
                # print(str_id)
                str_ids.append(str_id)
        if str_ids:
            print("[+] find_resources_id: {}".format(str_ids))
            return str_ids
        else:
            print("[x] ==find_resources_id==: Not find target resource id!")

    def find_target_layout_id(self, str_ids):
        target_layout_ids = []
        for str_id in str_ids:
            files = self.apk.get_files()
            for layout_file in files:
                # print(_file)
                if layout_file.startswith('res/layout/') and layout_file.endswith('.xml'):
                    # print(_file)
                    layout_byte = self.apk.get_file(layout_file)
                    # print(layout_byte)
                    layout_xml = AXMLPrinter(layout_byte)
                    layout_etree = layout_xml.get_xml_obj()
                    layout_ll = layout_etree.xpath('//*')
                    for i in layout_ll:
                        if i.get(self.ns + 'text'):
                            layout_text_id = i.get(self.ns + 'text')
                            if layout_text_id.startswith('@'):
                                # print(layout_text)
                                text_id_int = int(layout_text_id[1:], 16)
                                res_info = self.res_parser.get_id(self.package_name, text_id_int)
                                if str_id in res_info[1]:
                                    print(res_info[1], layout_file)
                                    text_android_id_id = i.xpath('..')[0].get(self.ns + 'id')
                                    # print(text_android_id_id)
                                    text_android_id = self.res_parser.get_id(self.package_name, int(text_android_id_id[1:], 16))
                                    print(text_android_id)
                                    target_layout_ids.append(text_android_id[1])
        return target_layout_ids

    def find_target_methods(self, target_layout_id):
        target_method_instructions_list = []
        target_methods = []
        str_onclick = 'setOnClickListener'
        a, ds, dx = AnalyzeAPK(self.apk_path)
        _all_classes = []
        for d in ds:
            _all_classes.extend(d.get_classes())
        for _class in _all_classes:
            class_name = _class.get_name()
            if 'google' in class_name or 'androidx' in class_name or 'support' in class_name:
                continue
            _methods = _class.get_methods()
            for _method in _methods:
                flag_tli = False
                flag_onclick = False
                _method.load()
                # print(type(_method))
                method_name = _method.get_name()
                # if 'findView' not in method_name:
                #     continue
                raw_instructions = _method.get_instructions()
                for i in raw_instructions:
                    instruction = i.get_output()

                    if target_layout_id in instruction:
                        # print(instruction)
                        flag_tli = True
                        # print(instruction)
                    if str_onclick in instruction:
                        # print(instruction)
                        flag_onclick = True
                        # print(instruction)
                if flag_onclick and flag_tli:
                    target_method_instructions_list.append(raw_instructions)
                    target_methods.append(_method)
                    print(method_name)
        return target_method_instructions_list, target_methods

    def start(self, crct_str):
        str_ids = self.find_resources_id(crct_str)
        print(str_ids)
        target_layout_ids = self.find_target_layout_id(str_ids)
        # for target_layout_id in target_layout_ids:
        target_method_instructions_list, target_methods = self.find_target_methods('ll_third_sdk_list')
        for target_method in target_methods:
            print(type(target_method), target_method)
            for _, call, _ in MethodClassAnalysis(target_method).get_xref_to():
                print("  calling -> {} -- {}".format(call.class_name, call.name))
            # tm_xref_to_list = MethodClassAnalysis(target_method).get_xref_to()
            # print(tm_xref_to_list)
            # for tm_xref_to in tm_xref_to_list:
            #     print(tm_xref_to)

            # for target_method_instructions in target_method_instructions_list:
            #     for i in target_method_instructions:
            #         reg = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
            #         url = re.match(reg, i)

    def test(self):
        """get_id_resources api有问题,获得的全是ALT false"""
        print(self.res_parser.get_id_resources(self.package_name))

    def test2(self):
        f = os.listdir('qmxs.apk')
        print(f)

一些使用时的报错

get_resolved_res_configs(rid, config=None)

>>> a.get_android_resources().get_resolved_res_configs("string/facebook_app_id")
>>> ValueError: 'rid' must be an int
#需为int类型