Tauri 一把嗦

下面代码可以将Tauri所有资源代码全部提取出来，如图：
使用：python .\tauri_extractor.py .\demo.exe -o output
import argparse
import io
import os
import struct
import sys
import zipfile

try:
    import brotli
except ImportError:
    print("Error: brotli module required. Install with: pip install brotli")
    sys.exit(1)


# ---------------------------------------------------------------------------
# Brotli streaming decompressor — used to find exact compressed size
# ---------------------------------------------------------------------------

def brotli_decompress_stream(data, offset, max_len=None):
    """Decompress a brotli stream starting at offset, return (decompressed, consumed_bytes).

    Uses brotli.Decompressor streaming API to find the exact end of the stream.
    """
    if max_len is None:
        max_len = len(data) - offset

    dec = brotli.Decompressor()
    out_parts = []
    consumed = 0
    chunk_size = 4096

    pos = offset
    end = min(offset + max_len, len(data))

    while pos < end:
        chunk = data[pos : min(pos + chunk_size, end)]
        if not chunk:
            break
        try:
            result = dec.process(chunk)
            out_parts.append(result)
            consumed += len(chunk)
            pos += len(chunk)
            if dec.is_finished():
                # The decompressor consumed all it needed from this chunk.
                # But it may not have used all bytes in the last chunk.
                # Unfortunately the Python brotli binding doesn't expose
                # how many bytes of the last chunk were consumed.
                # We'll use a binary search on the last chunk to find exact boundary.
                break
        except brotli.error:
            break

    if not dec.is_finished():
        # Try the simple decompress to check if the data is valid at all
        try:
            result = brotli.decompress(data[offset : offset + consumed])
            return result, consumed
        except brotli.error:
            return None, 0

    decompressed = b"".join(out_parts)

    # Refine: binary search on the last chunk to find exact consumed bytes
    base_consumed = consumed - len(chunk)
    lo, hi = 1, len(chunk)
    while lo < hi:
        mid = (lo + hi) // 2
        try:
            brotli.decompress(data[offset : offset + base_consumed + mid])
            hi = mid
        except brotli.error:
            lo = mid + 1

    exact_consumed = base_consumed + lo
    try:
        decompressed = brotli.decompress(data[offset : offset + exact_consumed])
        return decompressed, exact_consumed
    except brotli.error:
        return decompressed, consumed


def parse_pe(data):
    """Parse PE headers to get image_base and section info."""
    if data[:2] != b"MZ":
        raise ValueError("Not a valid PE file (missing MZ header)")

    pe_offset = struct.unpack_from("<I", data, 0x3C)[0]
    if data[pe_offset : pe_offset + 4] != b"PE\x00\x00":
        raise ValueError("Not a valid PE file (missing PE signature)")

    coff_offset = pe_offset + 4
    num_sections = struct.unpack_from("<H", data, coff_offset + 2)[0]
    optional_hdr_size = struct.unpack_from("<H", data, coff_offset + 16)[0]
    optional_offset = coff_offset + 20

    # Check PE32 vs PE32+
    magic = struct.unpack_from("<H", data, optional_offset)[0]
    if magic == 0x20B:  # PE32+
        image_base = struct.unpack_from("<Q", data, optional_offset + 24)[0]
    elif magic == 0x10B:  # PE32
        image_base = struct.unpack_from("<I", data, optional_offset + 28)[0]
    else:
        raise ValueError(f"Unknown PE optional header magic: 0x{magic:X}")

    sections_offset = optional_offset + optional_hdr_size
    sections = []
    for i in range(num_sections):
        off = sections_offset + i * 40
        name_bytes = data[off : off + 8].rstrip(b"\x00")
        name = name_bytes.decode("ascii", errors="replace")
        virtual_size = struct.unpack_from("<I", data, off + 8)[0]
        virtual_addr = struct.unpack_from("<I", data, off + 12)[0]
        raw_size = struct.unpack_from("<I", data, off + 16)[0]
        raw_offset = struct.unpack_from("<I", data, off + 20)[0]
        sections.append(
            {
                "name": name,
                "va": virtual_addr,
                "virtual_size": virtual_size,
                "raw_offset": raw_offset,
                "raw_size": raw_size,
            }
        )

    return {"image_base": image_base, "sections": sections}


def va_to_file_offset(va, image_base, sections):
    """Convert a virtual address to a file offset using section mappings."""
    rva = va - image_base
    for sec in sections:
        sec_start = sec["va"]
        sec_end = sec_start + max(sec["virtual_size"], sec["raw_size"])
        if sec_start <= rva < sec_end:
            return rva - sec["va"] + sec["raw_offset"]
    return None


def find_asset_headers(data, pe_info):
    """Find AssetHeader table in .rdata section and extract asset metadata."""
    image_base = pe_info["image_base"]
    sections = pe_info["sections"]

    # Find .rdata section
    rdata = None
    for sec in sections:
        if sec["name"] in (".rdata", ".rodata"):
            rdata = sec
            break
    if rdata is None:
        # Try any read-only data section
        for sec in sections:
            if "data" in sec["name"].lower() and sec["name"] != ".data":
                rdata = sec
                break
    if rdata is None:
        return []

    rdata_va_start = rdata["va"]
    rdata_va_end = rdata_va_start + rdata["virtual_size"]
    rdata_file_start = rdata["raw_offset"]
    rdata_file_end = rdata_file_start + rdata["raw_size"]

    assets = []

    # Scan .rdata for AssetHeader entries (32 bytes each, 8-byte aligned)
    scan_start = rdata_file_start
    scan_end = rdata_file_end - 32  # need at least 32 bytes for one entry

    # Align scan_start to 8 bytes
    if scan_start % 8 != 0:
        scan_start += 8 - (scan_start % 8)

    best_run = []
    best_run_start = 0

    pos = scan_start
    while pos <= scan_end:
        name_ptr, name_len, data_ptr, data_size = struct.unpack_from("<QQQQ", data, pos)

        # Validate this could be an AssetHeader
        valid = True

        # name_len should be reasonable (1-511)
        if name_len < 1 or name_len > 511:
            valid = False

        # data_size should be positive and reasonable
        if valid and (data_size == 0 or data_size > len(data)):
            valid = False

        # name_ptr should resolve to within the file
        if valid:
            name_off = va_to_file_offset(name_ptr, image_base, sections)
            if name_off is None or name_off < 0 or name_off + name_len > len(data):
                valid = False
            elif data[name_off : name_off + 1] != b"/":
                valid = False

        # data_ptr should resolve to within the file
        if valid:
            data_off = va_to_file_offset(data_ptr, image_base, sections)
            if data_off is None or data_off < 0 or data_off + data_size > len(data):
                valid = False

        if valid:
            name_bytes = data[name_off : name_off + name_len]
            try:
                name = name_bytes.decode("utf-8")
            except UnicodeDecodeError:
                valid = False

        if valid:
            # Check name looks like a path (printable, no control chars)
            if any(c < " " for c in name):
                valid = False

        if valid:
            entry = (name, data_off, data_size)
            if not best_run:
                best_run = [entry]
                best_run_start = pos
            else:
                best_run.append(entry)
            pos += 32
        else:
            if len(best_run) >= 2:
                # Found a valid run of entries
                assets.extend(best_run)
            best_run = []
            pos += 8  # move by alignment, not 32, to not miss a table start

    # Check final run
    if len(best_run) >= 2:
        assets.extend(best_run)

    return assets


def parse_elf(data):
    """Parse ELF headers to get sections and relocations."""
    if data[:4] != b"\x7fELF":
        raise ValueError("Not a valid ELF file")

    ei_class = data[4]  # 1=32bit, 2=64bit
    ei_data = data[5]   # 1=LE, 2=BE
    is_64 = (ei_class == 2)
    fmt = "<" if ei_data == 1 else ">"

    if is_64:
        # ELF64 header
        e_shoff = struct.unpack_from(fmt + "Q", data, 0x28)[0]
        e_shentsize = struct.unpack_from(fmt + "H", data, 0x3A)[0]
        e_shnum = struct.unpack_from(fmt + "H", data, 0x3C)[0]
        e_shstrndx = struct.unpack_from(fmt + "H", data, 0x3E)[0]
    else:
        e_shoff = struct.unpack_from(fmt + "I", data, 0x20)[0]
        e_shentsize = struct.unpack_from(fmt + "H", data, 0x2E)[0]
        e_shnum = struct.unpack_from(fmt + "H", data, 0x30)[0]
        e_shstrndx = struct.unpack_from(fmt + "H", data, 0x32)[0]

    # Read section headers
    sections = []
    for i in range(e_shnum):
        off = e_shoff + i * e_shentsize
        if is_64:
            sh_name = struct.unpack_from(fmt + "I", data, off)[0]
            sh_type = struct.unpack_from(fmt + "I", data, off + 4)[0]
            sh_flags = struct.unpack_from(fmt + "Q", data, off + 8)[0]
            sh_addr = struct.unpack_from(fmt + "Q", data, off + 16)[0]
            sh_offset = struct.unpack_from(fmt + "Q", data, off + 24)[0]
            sh_size = struct.unpack_from(fmt + "Q", data, off + 32)[0]
            sh_entsize = struct.unpack_from(fmt + "Q", data, off + 56)[0]
        else:
            sh_name = struct.unpack_from(fmt + "I", data, off)[0]
            sh_type = struct.unpack_from(fmt + "I", data, off + 4)[0]
            sh_flags = struct.unpack_from(fmt + "I", data, off + 8)[0]
            sh_addr = struct.unpack_from(fmt + "I", data, off + 12)[0]
            sh_offset = struct.unpack_from(fmt + "I", data, off + 16)[0]
            sh_size = struct.unpack_from(fmt + "I", data, off + 20)[0]
            sh_entsize = struct.unpack_from(fmt + "I", data, off + 36)[0]
        sections.append({
            "name_off": sh_name, "type": sh_type, "flags": sh_flags,
            "addr": sh_addr, "offset": sh_offset, "size": sh_size,
            "entsize": sh_entsize,
        })

    # Resolve section names
    if e_shstrndx < len(sections):
        strtab = sections[e_shstrndx]
        strtab_data = data[strtab["offset"] : strtab["offset"] + strtab["size"]]
        for sec in sections:
            noff = sec["name_off"]
            end = strtab_data.find(b"\x00", noff)
            sec["name"] = strtab_data[noff:end].decode("ascii", errors="replace") if end >= 0 else ""
    else:
        for sec in sections:
            sec["name"] = ""

    # Parse relocations (RELA sections, type 4)
    relocations = []
    for sec in sections:
        if sec["type"] == 4:  # SHT_RELA
            ent_size = sec["entsize"] or (24 if is_64 else 12)
            num_entries = sec["size"] // ent_size
            for i in range(num_entries):
                roff = sec["offset"] + i * ent_size
                if is_64:
                    r_offset = struct.unpack_from(fmt + "Q", data, roff)[0]
                    r_info = struct.unpack_from(fmt + "Q", data, roff + 8)[0]
                    r_addend = struct.unpack_from(fmt + "q", data, roff + 16)[0]
                    r_type = r_info & 0xFFFFFFFF
                else:
                    r_offset = struct.unpack_from(fmt + "I", data, roff)[0]
                    r_info = struct.unpack_from(fmt + "I", data, roff + 4)[0]
                    r_addend = struct.unpack_from(fmt + "i", data, roff + 8)[0]
                    r_type = r_info & 0xFF
                relocations.append({
                    "offset": r_offset,
                    "type": r_type,
                    "addend": r_addend,
                })

    return {"is_64": is_64, "fmt": fmt, "sections": sections, "relocations": relocations}


def elf_va_to_file_offset(va, sections):
    """Convert ELF virtual address to file offset."""
    for sec in sections:
        if sec["addr"] <= va < sec["addr"] + sec["size"]:
            return va - sec["addr"] + sec["offset"]
    return None


def find_elf_asset_entries(data, elf_info):
    """Find Tauri asset entries in ELF by analyzing phf::Map structure via relocations.

    The phf::Map entries are 32 bytes each: {name_ptr(8), name_len(8), data_ptr(8), data_len(8)}.
    In the ELF file, the pointer fields are zero and filled by R_AARCH64_RELATIVE relocations.
    The addend of each relocation is the virtual address of the pointed-to data.
    """
    sections = elf_info["sections"]
    relocations = elf_info["relocations"]

    # R_AARCH64_RELATIVE = 0x403, R_X86_64_RELATIVE = 8
    relative_relocs = [r for r in relocations if r["type"] in (0x403, 8, 3)]  # 3 = R_386_RELATIVE

    if not relative_relocs:
        return []

    # Build a map: file_offset -> addend (VA the pointer will resolve to)
    reloc_map = {}
    for r in relative_relocs:
        file_off = elf_va_to_file_offset(r["offset"], sections)
        if file_off is not None:
            reloc_map[file_off] = r["addend"]

    # Find .data.rel.ro or similar sections where phf entries live
    candidate_sections = [s for s in sections if s["name"] in (".data.rel.ro", ".data.rel.ro.local")]
    if not candidate_sections:
        # Try any writable alloc section
        candidate_sections = [s for s in sections if (s["flags"] & 3) == 3 and s["size"] > 0]  # WA

    assets = []

    for sec in candidate_sections:
        sec_start = sec["offset"]
        sec_end = sec_start + sec["size"]

        # Scan for 32-byte entries where:
        # - offset+0 has a relocation (name_ptr) -> points to a string starting with '/'
        # - offset+8 has a non-zero value (name_len)
        # - offset+16 has a relocation (data_ptr) -> points to brotli data
        # - offset+24 has a non-zero value (data_len)
        pos = sec_start
        while pos + 32 <= sec_end:
            if pos not in reloc_map or (pos + 16) not in reloc_map:
                pos += 8
                continue

            name_va = reloc_map[pos]
            name_len = struct.unpack_from("<Q", data, pos + 8)[0]
            data_va = reloc_map[pos + 16]
            data_len = struct.unpack_from("<Q", data, pos + 24)[0]

            # Validate
            if name_len < 1 or name_len > 512:
                pos += 8
                continue
            if data_len == 0 or data_len > len(data):
                pos += 8
                continue

            name_off = elf_va_to_file_offset(name_va, sections)
            data_off = elf_va_to_file_offset(data_va, sections)

            if name_off is None or data_off is None:
                pos += 8
                continue
            if name_off + name_len > len(data) or data_off + data_len > len(data):
                pos += 8
                continue

            # Check name starts with /
            if data[name_off : name_off + 1] != b"/":
                pos += 8
                continue

            try:
                name = data[name_off : name_off + name_len].decode("utf-8")
            except UnicodeDecodeError:
                pos += 8
                continue

            if any(c < " " for c in name):
                pos += 8
                continue

            # Filter: must look like a web resource path (contain a dot for extension)
            # This filters out JNI class paths like /RustWebView, /Ipc, etc.
            basename = name.rsplit("/", 1)[-1]
            if "." not in basename:
                pos += 8
                continue

            # Verify the data is actually valid brotli
            try:
                brotli.decompress(data[data_off : data_off + data_len])
            except brotli.error:
                pos += 8
                continue

            assets.append((name, data_off, data_len))
            pos += 32

    return assets


def find_brotli_streams_near_paths(data, start=0, end=None):
    """Fallback for ELF: find resource paths and use streaming brotli to extract exact data."""
    if end is None:
        end = len(data)

    known_exts = (
        b".html", b".htm", b".js", b".mjs", b".css",
        b".json", b".svg", b".png", b".jpg", b".jpeg",
        b".gif", b".webp", b".ico", b".woff", b".woff2",
        b".ttf", b".eot", b".wasm",
    )

    # Find all path-like strings
    paths = []
    pos = start
    while pos < end - 2:
        idx = data.find(b"/", pos, end)
        if idx == -1:
            break

        # Read path characters
        path_end = idx + 1
        while path_end < end and path_end - idx < 512:
            b = data[path_end]
            if 0x21 <= b <= 0x7E and b not in (0x5C,):
                path_end += 1
            else:
                break

        if path_end - idx < 3:
            pos = idx + 1
            continue

        path_bytes = data[idx:path_end]
        has_ext = any(path_bytes.lower().endswith(ext) for ext in known_exts)
        if not has_ext:
            pos = idx + 1
            continue

        try:
            path_str = path_bytes.decode("utf-8")
        except UnicodeDecodeError:
            pos = idx + 1
            continue

        # Only accept paths that don't contain other paths concatenated
        # (Rust &str slices can be adjacent without separators)
        slash_count = path_str.count("/")
        has_subdir = "/" in path_str[1:]
        # Allow paths like /assets/foo.svg (one subdirectory)
        # but reject paths with too many concatenated filenames
        if slash_count > 3:
            pos = idx + 1
            continue

        paths.append((path_str, idx, path_end))
        pos = path_end

    # For each path, try streaming brotli decompression right after the path
    assets = []
    for name, name_start, name_end in paths:
        decompressed, consumed = brotli_decompress_stream(data, name_end, end - name_end)
        if decompressed is not None and consumed > 0:
            assets.append((name, name_end, consumed))

    return assets


def extract_assets(filepath, output_dir):
    """Main extraction function for EXE files."""
    print(f"Reading {filepath}...")
    with open(filepath, "rb") as f:
        data = f.read()
    print(f"File size: {len(data):,} bytes")

    pe_info = parse_pe(data)
    print(f"Image base: 0x{pe_info['image_base']:X}")
    print(f"Sections: {', '.join(s['name'] for s in pe_info['sections'])}")

    # Try fast path: find AssetHeader table
    print("\nSearching for AssetHeader table...")
    assets = find_asset_headers(data, pe_info)

    if assets:
        print(f"Found {len(assets)} assets via AssetHeader table (fast path)")
    else:
        # Fallback: scan for paths
        print("AssetHeader table not found, falling back to path scan...")
        # Scan in .rdata section
        rdata = None
        for sec in pe_info["sections"]:
            if sec["name"] in (".rdata", ".rodata"):
                rdata = sec
                break
        if rdata:
            assets = find_brotli_streams_near_paths(
                data, rdata["raw_offset"], rdata["raw_offset"] + rdata["raw_size"]
            )
        if not assets:
            # Try scanning the whole file
            print("Scanning entire file...")
            assets = find_brotli_streams_near_paths(data)

        if assets:
            print(f"Found {len(assets)} assets via path scan (fallback)")
        else:
            print("No assets found.")
            return

    # Extract and decompress
    os.makedirs(output_dir, exist_ok=True)
    print(f"\nExtracting to {output_dir}/")
    print("-" * 60)

    for name, data_off, data_size in assets:
        compressed = data[data_off : data_off + data_size]
        try:
            decompressed = brotli.decompress(compressed)
            status = "OK"
        except brotli.error:
            # Try trimming — fallback path might include trailing bytes
            decompressed = None
            for trim in range(min(256, data_size)):
                try:
                    decompressed = brotli.decompress(compressed[: data_size - trim])
                    status = f"OK (trimmed {trim} bytes)"
                    break
                except brotli.error:
                    continue
            if decompressed is None:
                print(f"  FAIL  {name} ({data_size:,} bytes) — brotli decompression failed")
                # Save raw compressed data anyway
                out_path = os.path.join(output_dir, name.lstrip("/"))
                os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True)
                with open(out_path + ".br", "wb") as f:
                    f.write(compressed)
                continue

        out_path = os.path.join(output_dir, name.lstrip("/"))
        os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True)
        with open(out_path, "wb") as f:
            f.write(decompressed)

        print(f"  {status:20s} {name:30s} {data_size:>10,} -> {len(decompressed):>10,} bytes")

    print("-" * 60)
    print(f"Done. {len(assets)} assets extracted to {output_dir}/")


def handle_apk(filepath, output_dir):
    """Handle APK files: unzip, find .so files, extract assets from them."""
    print(f"Processing APK: {filepath}")

    with zipfile.ZipFile(filepath, "r") as zf:
        # Find Tauri .so files (typically libapp.so or similar)
        so_files = [
            name
            for name in zf.namelist()
            if name.endswith(".so") and ("libapp" in name or "libtauri" in name)
        ]

        if not so_files:
            # Try all .so files
            so_files = [name for name in zf.namelist() if name.endswith(".so")]
            print(f"No libapp.so found, trying all .so files: {so_files}")

        if not so_files:
            print("No .so files found in APK")
            return

        for so_name in so_files:
            print(f"\nExtracting from {so_name}...")
            so_data = zf.read(so_name)
            extract_so(so_data, so_name, output_dir)


def extract_so(data, name, output_dir):
    """Extract assets from an ELF .so file."""
    if data[:4] != b"\x7fELF":
        print(f"  {name} is not a valid ELF file, skipping")
        return

    print(f"  Parsing ELF structure...")
    elf_info = parse_elf(data)
    sections = elf_info["sections"]
    sec_names = [s["name"] for s in sections if s["name"]]
    print(f"  Sections: {', '.join(sec_names[:15])}{'...' if len(sec_names) > 15 else ''}")
    print(f"  Relocations: {len(elf_info['relocations'])}")

    # Try fast path: parse phf::Map entries via relocations
    print(f"  Searching for phf::Map asset entries...")
    assets = find_elf_asset_entries(data, elf_info)

    if assets:
        print(f"  Found {len(assets)} assets via relocation analysis (fast path)")
    else:
        # Fallback: scan .rodata for paths + streaming brotli
        print(f"  Relocation analysis failed, falling back to path+brotli scan...")
        rodata = None
        for sec in sections:
            if sec["name"] == ".rodata":
                rodata = sec
                break
        if rodata:
            assets = find_brotli_streams_near_paths(
                data, rodata["offset"], rodata["offset"] + rodata["size"]
            )
        if not assets:
            assets = find_brotli_streams_near_paths(data)

        if assets:
            print(f"  Found {len(assets)} assets via path+brotli scan (fallback)")
        else:
            print(f"  No assets found in {name}")
            return

    os.makedirs(output_dir, exist_ok=True)
    print(f"  Extracting to {output_dir}/")
    print("  " + "-" * 58)

    success = 0
    for res_name, data_off, data_size in assets:
        compressed = data[data_off : data_off + data_size]
        try:
            decompressed = brotli.decompress(compressed)
            status = "OK"
        except brotli.error:
            # Try streaming decompress to find exact boundary
            decompressed, exact_size = brotli_decompress_stream(data, data_off, data_size)
            if decompressed is not None:
                status = f"OK (stream, {exact_size}B)"
            else:
                print(f"    FAIL  {res_name} ({data_size:,} bytes) — decompression failed")
                continue

        out_path = os.path.join(output_dir, res_name.lstrip("/"))
        os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True)
        with open(out_path, "wb") as f:
            f.write(decompressed)
        print(f"    {status:20s} {res_name:30s} {data_size:>10,} -> {len(decompressed):>10,} bytes")
        success += 1

    print("  " + "-" * 58)
    print(f"  Done. {success}/{len(assets)} assets extracted.")


def main():
    parser = argparse.ArgumentParser(
        description="Tauri v2 Resource Extractor — extract embedded brotli-compressed assets"
    )
    parser.add_argument("input_file", help="Input file: .exe, .apk, or .so")
    parser.add_argument(
        "-o", "--output",
        help="Output directory (default: <filename>_extracted)",
        default=None,
    )
    args = parser.parse_args()

    if not os.path.isfile(args.input_file):
        print(f"Error: file not found: {args.input_file}")
        sys.exit(1)

    if args.output is None:
        base = os.path.splitext(os.path.basename(args.input_file))[0]
        args.output = f"{base}_extracted"

    ext = os.path.splitext(args.input_file)[1].lower()

    if ext == ".apk":
        handle_apk(args.input_file, args.output)
    elif ext == ".so":
        with open(args.input_file, "rb") as f:
            data = f.read()
        extract_so(data, args.input_file, args.output)
    else:
        # Default: treat as PE/EXE
        extract_assets(args.input_file, args.output)


if __name__ == "__main__":
    main()