{"id":967,"date":"2026-06-12T13:02:08","date_gmt":"2026-06-12T04:02:08","guid":{"rendered":"http:\/\/kyunam.com\/?p=967"},"modified":"2026-06-12T13:02:08","modified_gmt":"2026-06-12T04:02:08","slug":"%ec%96%b4%eb%96%a4-%ec%84%a4%ec%a0%95%ec%9c%bc%eb%a1%9c-vllm%ec%9d%84-%eb%b9%a0%eb%a5%b4%ea%b2%8c-%ed%95%a0-%ea%b2%83%ec%9d%b8%ea%b0%80","status":"publish","type":"post","link":"http:\/\/kyunam.com\/?p=967","title":{"rendered":"\uc5b4\ub5a4 \uc124\uc815\uc73c\ub85c vLLM\uc744 \ube60\ub974\uac8c \ud560 \uac83\uc778\uac00?"},"content":{"rendered":"\n<!DOCTYPE html>\n<html lang=\"ko\">\n<head>\n<meta charset=\"UTF-8\">\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n<title>\uadf8 \uc124\uc815, \ub2e4\ub978 \ubaa8\ub378\uc5d0\uc11c\ub3c4 \ud1b5\ud55c\ub2e4. \uac70\uc758. \u2014 vLLM Performance \u00b7 Engineering Notes<\/title>\n<link rel=\"preconnect\" href=\"https:\/\/fonts.googleapis.com\">\n<link rel=\"preconnect\" href=\"https:\/\/fonts.gstatic.com\" crossorigin>\n<link href=\"https:\/\/fonts.googleapis.com\/css2?family=IBM+Plex+Sans+KR:wght@300;400;500;700&#038;family=IBM+Plex+Mono:wght@400;500&#038;display=swap\" rel=\"stylesheet\">\n<style>\n:root{\n  --ink:#1c1f23;\n  --paper:#fdfdfb;\n  --muted:#6e747d;\n  --rule:#e6e4de;\n  --soft:#f4f3ef;\n  \/* \uce21\uc815\uc810 6\uc0c9 \u2014 \uc774 \uae00\uc758 \uc2dc\uac01 \uc5b8\uc5b4 *\/\n  --p1:#a3a8ae;  \/* \u2460 van(OFF)    *\/\n  --p2:#5d6b7c;  \/* \u2461 van(ON)     *\/\n  --p3:#2f6bb0;  \/* \u2462 DSA(ON)     *\/\n  --p4:#1f8a5f;  \/* \u2463 suf(OFF) \u2014 winner, \ubcf8\ubb38 accent \uacb8\uc6a9 *\/\n  --p5:#33a578;  \/* \u2464 suf(ON)     *\/\n  --p6:#7a5cb5;  \/* \u2465 suf+dsa(ON) *\/\n  --neg:#c0533f;\n}\n*{box-sizing:border-box;margin:0;padding:0}\nhtml{scroll-behavior:smooth}\n@media (prefers-reduced-motion:reduce){html{scroll-behavior:auto}}\nbody{\n  font-family:'IBM Plex Sans KR',-apple-system,'Noto Sans KR',sans-serif;\n  background:var(--paper);color:var(--ink);\n  line-height:1.78;font-size:16.5px;\n  font-feature-settings:'tnum';\n}\n.wrap{max-width:760px;margin:0 auto;padding:64px 22px 80px}\n\/* \u2500\u2500 header \u2500\u2500 *\/\n.eyebrow{font-family:'IBM Plex Mono',monospace;font-size:12.5px;letter-spacing:.14em;color:var(--p4);text-transform:uppercase;margin-bottom:18px}\nh1{font-size:clamp(28px,5vw,40px);line-height:1.3;font-weight:700;letter-spacing:-.02em;margin-bottom:22px}\n.deck{font-size:17.5px;color:#3c4148;font-weight:300;line-height:1.85;border-left:3px solid var(--p4);padding-left:18px;margin-bottom:34px}\n.hr-dot{border:none;border-top:1px dashed var(--rule);margin:34px 0}\n\/* \u2500\u2500 \ubcf8\ubb38 lead \u2500\u2500 *\/\n.lead{margin-bottom:8px}\nstrong{font-weight:700}\nem{font-style:normal;background:linear-gradient(transparent 62%,rgba(31,138,95,.18) 62%)}\n\/* \u2500\u2500 section \u2500\u2500 *\/\nsection{margin-top:64px}\n.sec-no{font-family:'IBM Plex Mono',monospace;font-size:13px;color:var(--p4);letter-spacing:.1em}\nh2{font-size:24px;font-weight:700;letter-spacing:-.01em;margin:2px 0 4px}\n.tagline{font-size:14.5px;color:var(--muted);margin-bottom:20px}\nh3{font-size:17.5px;font-weight:700;margin:30px 0 10px}\np{margin-bottom:16px}\nul,ol{margin:0 0 16px 22px}\nli{margin-bottom:10px}\n\/* \u2500\u2500 \uce21\uc815\uc810 chip \u2500\u2500 *\/\n.pt{display:inline-block;font-family:'IBM Plex Mono',monospace;font-size:12px;font-weight:500;\n  padding:1px 8px;border-radius:3px;color:#fff;white-space:nowrap;vertical-align:1px}\n.pt1{background:var(--p1)} .pt2{background:var(--p2)} .pt3{background:var(--p3)}\n.pt4{background:var(--p4)} .pt5{background:var(--p5)} .pt6{background:var(--p6)}\n\/* \u2500\u2500 table \u2500\u2500 *\/\n.tbl-scroll{overflow-x:auto;margin:20px 0 8px}\ntable{border-collapse:collapse;width:82%;font-size:13.5px;line-height:1.5}\ncaption{caption-side:bottom;text-align:left;font-size:12.5px;color:var(--muted);padding-top:8px}\nth,td{padding:7px 9px;border-bottom:1px solid var(--rule);text-align:left;vertical-align:top}\nthead th{font-size:12px;color:var(--muted);font-weight:500;border-bottom:1.5px solid #cfcdc6;white-space:nowrap}\ntd.num,th.num{text-align:right;font-family:'IBM Plex Mono',monospace;font-size:12.8px;white-space:nowrap}\ntd.win{font-weight:700;background:rgba(31,138,95,.07)}\ntd.neg{color:var(--neg)}\ntd.mono,th.mono{font-family:'IBM Plex Mono',monospace;font-size:12.5px}\ntbody tr:hover{background:var(--soft)}\n\/* \u2500\u2500 figures (PNG) \u2500\u2500 *\/\n.fig-img{margin:26px 0 8px}\n.fig-img img{display:block;width:100%;height:auto;border:1px solid #e7dfd0;border-radius:6px}\n.fig-cap{font-size:12.5px;color:var(--muted);margin-top:10px;line-height:1.6}\n\/* \u2500\u2500 code \u2500\u2500 *\/\npre{background:#22262b;color:#e7eaee;font-family:'IBM Plex Mono',monospace;font-size:13px;\n  line-height:1.7;padding:18px 20px;border-radius:6px;overflow-x:auto;margin:18px 0}\ncode{font-family:'IBM Plex Mono',monospace;font-size:.88em;background:var(--soft);padding:1px 5px;border-radius:3px}\npre code{background:none;padding:0;font-size:inherit}\n.cm{color:#8b949e}\n\/* \u2500\u2500 callout \u2500\u2500 *\/\n.note{border-left:3px solid var(--p3);background:#f4f7fb;padding:14px 18px;border-radius:0 6px 6px 0;\n  font-size:14.5px;margin:20px 0;color:#33404f}\n.note b{color:#23476e}\n\/* \u2500\u2500 \uc815\ub9ac\/closing \u2500\u2500 *\/\n.closing{margin-top:26px;padding:22px 24px;background:var(--soft);border-radius:6px;font-size:15.5px}\n.closing .k{font-family:'IBM Plex Mono',monospace;font-size:12px;color:var(--p4);letter-spacing:.12em;display:block;margin-bottom:8px}\n.env{margin-top:46px;padding-top:18px;border-top:1px solid var(--rule);\n  font-size:13px;color:var(--muted);line-height:1.8}\n\/* \u2500\u2500 full table \u2500\u2500 *\/\n.tbl-scroll.wide{width:min(96vw,1200px);position:relative;left:50%;transform:translateX(-50%);margin-left:0}\n.tbl-scroll.wide table{font-size:14px}\n.tbl-scroll.wide th,.tbl-scroll.wide td{padding:8px 12px}\n.tbl-scroll.wide td.num,.tbl-scroll.wide th.num{font-size:13.5px}\n.tbl-scroll.wide td.mname{font-size:13px}\ntd.mname{font-family:'IBM Plex Mono',monospace;font-size:12px}\n@media(max-width:560px){\n  .bar-label{flex-basis:108px;font-size:10.5px}\n  .wrap{padding-top:44px}\n}\n<\/style>\n<\/head>\n<body>\n<div class=\"wrap\">\n\n<header>\n  <div class=\"eyebrow\">vLLM Performance \u00b7 Engineering Notes \u2014 2\ud3b8<\/div>\n  <h1>\uadf8 \uc124\uc815, \ub2e4\ub978 \ubaa8\ub378\uc5d0\uc11c\ub3c4 \ud1b5\ud55c\ub2e4. \uac70\uc758.<\/h1>\n  <p class=\"deck\">\n    <a href=\"http:\/\/kyunam.com\/?p=940\" style=\"color:inherit\">\uc804\ud3b8<\/a>\uc5d0\uc11c \uc815\ub9ac\ud55c suffix \uad6c\uc131\uc744\n    <strong>10\uac1c \ubaa8\ub378 \u00d7 7\uac1c \uc2e4\uce21 corpus \u00d7 6\uac1c \uce21\uc815\uc810<\/strong> \u2014 B200\u00d78 \uc704 <strong>406\uc140<\/strong>(420\uc140 \uc911 96.7%)\ub85c \uc7ac\uac80\uc99d\ud55c \uae30\ub85d.\n    \uadf8\ub9ac\uace0 vLLM \uae30\ubcf8 \uc124\uc815\uc758 \ucc98\ub9ac\ub7c9\uc744 +36% \ud754\ub4e0 \uc0c8 \ubcc0\uc218 \ud558\ub098, <strong>\ud638\uc2a4\ud2b8 DSA<\/strong>.\n  <\/p>\n  <hr class=\"hr-dot\">\n<\/header>\n\n<p class=\"lead\">\n\uc804\ud3b8\uc758 \uacb0\ub860\uc740 \ub2e8\uc21c\ud588\ub2e4. \ucf54\ub4dc \ud55c \uc904 \uace0\uce58\uc9c0 \uc54a\uace0 vLLM \ub0b4\uc7a5 \uc124\uc815 \u2014 <code>speculative_config<\/code>\uc758 suffix decoding,\n\uc801\uc808\ud55c cudagraph mode \u2014 \ub9cc\uc73c\ub85c Llama-3.3-70B\ub97c \ubaa8\ub4e0 \uc6cc\ud06c\ub85c\ub4dc\uc5d0\uc11c \uac00\uc18d\ud560 \uc218 \uc788\ub2e4\ub294 \uac83.\n\ud558\uc9c0\ub9cc \uadf8\uac74 <em>\ubaa8\ub378 \ud558\ub098, \ud558\ub4dc\uc6e8\uc5b4 \ud558\ub098<\/em>\uc5d0\uc11c\uc758 \uacb0\ub860\uc774\uc5c8\ub2e4. \uc774\ubc88\uc5d0\ub294 \uc9c8\ubb38\uc744 \ub4a4\uc9d1\uc5c8\ub2e4.\n\uac19\uc740 \ucc98\ubc29\uc774 7B\ubd80\ud130 671B MoE\uae4c\uc9c0, 10\uac1c \ubaa8\ub378\uc5d0\uc11c\ub3c4 \uc0b4\uc544\ub0a8\ub294\uac00?\nB200\u00d78\uc5d0\uc11c 10\uac1c \ubaa8\ub378 \u00d7 7\uac1c corpus \u00d7 6\uac1c \uce21\uc815\uc810, 406\uc140\uc744 \ucc44\uc6cc \ub2f5\ud588\ub2e4.\n\uacb0\uacfc\ub97c \uba3c\uc800 \ub9d0\ud558\uba74 \u2014 <strong>suffix\ub294 70\uc140 \uc911 36\uc140\uc5d0\uc11c \uc774\uacbc\uace0, \ub450 \uac1c\uc758 \ubd84\uba85\ud55c \uc608\uc678\ub97c \ub9cc\ub0ac\ub2e4.<\/strong>\n\uadf8\ub9ac\uace0 \uce21\uc815 \ub3c4\uc911, \uc124\uc815\uacfc \ubb34\uad00\ud558\uac8c vLLM \uae30\ubcf8 \uc124\uc815\uc758 throughput\uc744 +33~36% \ud754\ub4dc\ub294 \ud638\uc2a4\ud2b8 \ubcc0\uc218\ub97c \ud558\ub098 \ubc1c\uacac\ud588\ub2e4.\n<\/p>\n\n<!-- \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 01 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 -->\n<section id=\"s01\">\n  <div class=\"sec-no\">01<\/div>\n  <h2>\uc804\uccb4 \uacb0\uacfc\ubd80\ud130<\/h2>\n  <p class=\"tagline\">10\uac1c \ubaa8\ub378 \u00d7 7\uac1c corpus \u00d7 6\uac1c \uce21\uc815\uc810, 70\uc140 \uc804\ubd80.<\/p>\n\n  <p>\uac19\uc740 GPU, \uac19\uc740 harness, \uac19\uc740 real-trace prompt\ub2e4. \uba3c\uc800 \uc6b4\uc601 \ubd84\ud3ec proxy\uc778 mix corpus\uc5d0\uc11c\n  vLLM \uae30\ubcf8 \uc124\uc815 \uadf8\ub300\ub85c\uc778 <span class=\"pt pt1\">van(OFF)<\/span>\uc640 suffix decoding\uc744 \ucf20 <span class=\"pt pt4\">suf(OFF)<\/span>\ub97c \ube44\uad50\ud558\uba74 \uc774\ub807\ub2e4.\n  \ub77c\ubca8 \uad04\ud638 \uc548\uc758 <strong>OFF\/ON\uc740 vLLM \uc124\uc815\uc774 \uc544\ub2c8\ub77c \ud638\uc2a4\ud2b8 DSA\uc758 \uc0c1\ud0dc<\/strong>\ub2e4 \u2014 \uc5ec\uae30\uc11c\ub294 \ub458 \ub2e4 \ud638\uc2a4\ud2b8 DSA OFF\ub85c \ub9de\ucd98 \ub3d9\uc77c \uc870\uac74 \ube44\uad50\uc774\uace0, \uc790\uc138\ud55c \uc815\uc758\ub294 \u00a702\uc5d0 \uc788\ub2e4.<\/p>\n\n  <figure class=\"fig-img\"><img decoding=\"async\" src=\"http:\/\/kyunam.com\/wp-content\/uploads\/2026\/06\/vllm2-figure-01.png\" alt=\"mix corpus\uc5d0\uc11c 10\uac1c \ubaa8\ub378\uc758 vLLM \uae30\ubcf8 \uc124\uc815 \ub300\ube44 suffix decoding throughput \ube44\uad50\" loading=\"lazy\"><figcaption class=\"fig-cap\">dense 9\uac1c \ubaa8\ub378\uc740 7B\ubd80\ud130 405B\uae4c\uc9c0 \uc804\ubd80 net-positive(+83~+232%). \ub2e8 \ud558\ub098\uc758 \uc608\uc678 \u2014 671B MoE\ub294 suffix\uac00 \ud68c\uadc0\ud55c\ub2e4. \ubcf8 \uc2e4\ud5d8 + TSK_042<\/figcaption><\/figure>\n\n  <p>\uadf8\ub9ac\uace0 \uc544\ub798\uac00 70\uc140 \uc804\uccb4\ub2e4. \uac01 \uce21\uc815 \uc870\uac74\uc758 \uc790\uc138\ud55c \uc815\uc758\ub294 \u00a702\uc5d0 \uc788\uace0 \u2014 \uc9e7\uac8c\ub294 \ud638\uc2a4\ud2b8 DSA OFF\/ON \u00d7\n  vLLM \uae30\ubcf8 \uc124\uc815(van) \/ suffix(suf) \/ vllm-DSA-env(dsa) \uc870\ud569\uc774\ub2e4 \u2014 \uad75\uc740 \uc140\uc774 \uac01 \ud589\uc758 winner\ub2e4. \ub2e8\uc704 tok\/s.<\/p>\n\n<div class=\"tbl-scroll wide\"><table>\n      <thead><tr>\n        <th>model<\/th><th>corpus<\/th>\n        <th class=\"num\">van(OFF)<\/th><th class=\"num\">van(ON)<\/th><th class=\"num\">DSA(ON)<\/th>\n        <th class=\"num\">suf(OFF)<\/th><th class=\"num\">suf(ON)<\/th><th class=\"num\">suf+dsa(ON)<\/th><th>winner<\/th>\n      <\/tr><\/thead>\n      <tbody>\n        <tr><td class=\"mname\" rowspan=\"7\">Qwen2.5-7B-Instruct<\/td><td>sharegpt<\/td><td class=\"num\">4,189<\/td><td class=\"num\">5,600<\/td><td class=\"num\">5,640<\/td><td class=\"num win\">6,167<\/td><td class=\"num\">6,058<\/td><td class=\"num\">6,164<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>swebench<\/td><td class=\"num\">4,120<\/td><td class=\"num\">5,871<\/td><td class=\"num win\">5,973<\/td><td class=\"num\">5,416<\/td><td class=\"num\">5,322<\/td><td class=\"num\">5,551<\/td><td><span class=\"pt pt3\">DSA(ON)<\/span><\/td><\/tr>\n        <tr><td>humaneval<\/td><td class=\"num\">3,754<\/td><td class=\"num\">5,331<\/td><td class=\"num win\">5,336<\/td><td class=\"num\">5,213<\/td><td class=\"num\">4,863<\/td><td class=\"num\">4,989<\/td><td><span class=\"pt pt3\">DSA(ON)<\/span><\/td><\/tr>\n        <tr><td>mbpp<\/td><td class=\"num\">3,814<\/td><td class=\"num\">5,931<\/td><td class=\"num win\">5,965<\/td><td class=\"num\">5,506<\/td><td class=\"num\">5,346<\/td><td class=\"num\">5,390<\/td><td><span class=\"pt pt3\">DSA(ON)<\/span><\/td><\/tr>\n        <tr><td>wildchat<\/td><td class=\"num\">4,184<\/td><td class=\"num\">5,694<\/td><td class=\"num\">5,644<\/td><td class=\"num\">6,285<\/td><td class=\"num\">5,974<\/td><td class=\"num win\">6,293<\/td><td><span class=\"pt pt6\">suf+dsa(ON)<\/span><\/td><\/tr>\n        <tr><td>lmsys<\/td><td class=\"num\">4,090<\/td><td class=\"num\">5,409<\/td><td class=\"num\">5,427<\/td><td class=\"num\">5,956<\/td><td class=\"num\">5,906<\/td><td class=\"num win\">6,038<\/td><td><span class=\"pt pt6\">suf+dsa(ON)<\/span><\/td><\/tr>\n        <tr><td>mix<\/td><td class=\"num\">4,169<\/td><td class=\"num\">5,564<\/td><td class=\"num\">5,572<\/td><td class=\"num win\">7,803<\/td><td class=\"num\">7,478<\/td><td class=\"num\">7,457<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td class=\"mname\" rowspan=\"7\">DS-R1-Distill-Qwen-7B<\/td><td>sharegpt<\/td><td class=\"num\">8,724<\/td><td class=\"num win\">12,232<\/td><td class=\"num\">12,170<\/td><td class=\"num\">11,961<\/td><td class=\"num\">11,234<\/td><td class=\"num\">11,240<\/td><td><span class=\"pt pt2\">van(ON)<\/span><\/td><\/tr>\n        <tr><td>swebench<\/td><td class=\"num\">8,835<\/td><td class=\"num\">11,891<\/td><td class=\"num\">11,888<\/td><td class=\"num win\">15,422<\/td><td class=\"num\">14,671<\/td><td class=\"num\">14,682<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>humaneval<\/td><td class=\"num\">8,159<\/td><td class=\"num\">11,273<\/td><td class=\"num\">11,240<\/td><td class=\"num win\">11,459<\/td><td class=\"num\">11,035<\/td><td class=\"num\">10,519<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>mbpp<\/td><td class=\"num\">8,440<\/td><td class=\"num\">11,694<\/td><td class=\"num\">11,676<\/td><td class=\"num win\">12,398<\/td><td class=\"num\">11,481<\/td><td class=\"num\">12,260<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>wildchat<\/td><td class=\"num\">8,925<\/td><td class=\"num win\">12,319<\/td><td class=\"num\">12,210<\/td><td class=\"num\">11,717<\/td><td class=\"num\">10,795<\/td><td class=\"num\">11,263<\/td><td><span class=\"pt pt2\">van(ON)<\/span><\/td><\/tr>\n        <tr><td>lmsys<\/td><td class=\"num\">8,811<\/td><td class=\"num\">12,055<\/td><td class=\"num win\">12,057<\/td><td class=\"num\">11,360<\/td><td class=\"num\">11,052<\/td><td class=\"num\">11,390<\/td><td><span class=\"pt pt3\">DSA(ON)<\/span><\/td><\/tr>\n        <tr><td>mix<\/td><td class=\"num\">9,058<\/td><td class=\"num\">12,277<\/td><td class=\"num\">12,301<\/td><td class=\"num win\">24,458<\/td><td class=\"num\">22,467<\/td><td class=\"num\">22,193<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td class=\"mname\" rowspan=\"7\">Llama-3.1-8B-Instruct<\/td><td>sharegpt<\/td><td class=\"num\">8,868<\/td><td class=\"num\">12,091<\/td><td class=\"num\">12,088<\/td><td class=\"num\">19,054<\/td><td class=\"num\">18,073<\/td><td class=\"num win\">19,328<\/td><td><span class=\"pt pt6\">suf+dsa(ON)<\/span><\/td><\/tr>\n        <tr><td>swebench<\/td><td class=\"num\">8,348<\/td><td class=\"num\">11,970<\/td><td class=\"num\">11,518<\/td><td class=\"num win\">21,353<\/td><td class=\"num\">20,735<\/td><td class=\"num\">20,518<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>humaneval<\/td><td class=\"num\">9,048<\/td><td class=\"num\">10,967<\/td><td class=\"num\">11,061<\/td><td class=\"num\">15,126<\/td><td class=\"num\">14,794<\/td><td class=\"num win\">15,601<\/td><td><span class=\"pt pt6\">suf+dsa(ON)<\/span><\/td><\/tr>\n        <tr><td>mbpp<\/td><td class=\"num\">8,730<\/td><td class=\"num\">12,190<\/td><td class=\"num\">12,066<\/td><td class=\"num\">17,825<\/td><td class=\"num win\">17,976<\/td><td class=\"num\">17,360<\/td><td><span class=\"pt pt5\">suf(ON)<\/span><\/td><\/tr>\n        <tr><td>wildchat<\/td><td class=\"num\">9,002<\/td><td class=\"num\">12,210<\/td><td class=\"num\">12,197<\/td><td class=\"num win\">19,856<\/td><td class=\"num\">19,602<\/td><td class=\"num\">19,451<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>lmsys<\/td><td class=\"num\">9,074<\/td><td class=\"num\">12,528<\/td><td class=\"num\">11,993<\/td><td class=\"num win\">19,862<\/td><td class=\"num\">19,361<\/td><td class=\"num\">18,905<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>mix<\/td><td class=\"num\">8,850<\/td><td class=\"num\">12,089<\/td><td class=\"num\">12,058<\/td><td class=\"num win\">27,851<\/td><td class=\"num\">24,407<\/td><td class=\"num\">26,615<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td class=\"mname\" rowspan=\"7\">Qwen2.5-32B-Instruct<\/td><td>sharegpt<\/td><td class=\"num\">3,079<\/td><td class=\"num\">4,591<\/td><td class=\"num\">4,607<\/td><td class=\"num win\">4,662<\/td><td class=\"num\">4,499<\/td><td class=\"num\">4,474<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>swebench<\/td><td class=\"num\">2,892<\/td><td class=\"num\">4,148<\/td><td class=\"num\">4,244<\/td><td class=\"num win\">5,002<\/td><td class=\"num\">4,348<\/td><td class=\"num\">4,566<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>humaneval<\/td><td class=\"num\">2,571<\/td><td class=\"num\">3,602<\/td><td class=\"num\">3,527<\/td><td class=\"num win\">4,859<\/td><td class=\"num\">4,325<\/td><td class=\"num\">4,269<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>mbpp<\/td><td class=\"num\">2,915<\/td><td class=\"num\">4,295<\/td><td class=\"num\">4,425<\/td><td class=\"num win\">5,138<\/td><td class=\"num\">4,826<\/td><td class=\"num\">4,817<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>wildchat<\/td><td class=\"num\">3,128<\/td><td class=\"num\">4,804<\/td><td class=\"num\">4,738<\/td><td class=\"num win\">4,884<\/td><td class=\"num\">4,651<\/td><td class=\"num\">4,504<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>lmsys<\/td><td class=\"num\">3,053<\/td><td class=\"num win\">4,686<\/td><td class=\"num\">4,628<\/td><td class=\"num\">4,478<\/td><td class=\"num\">4,578<\/td><td class=\"num\">4,249<\/td><td><span class=\"pt pt2\">van(ON)<\/span><\/td><\/tr>\n        <tr><td>mix<\/td><td class=\"num\">3,056<\/td><td class=\"num\">4,694<\/td><td class=\"num\">4,698<\/td><td class=\"num win\">6,597<\/td><td class=\"num\">5,979<\/td><td class=\"num\">6,256<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td class=\"mname\" rowspan=\"7\">DS-R1-Distill-Qwen-32B<\/td><td>sharegpt<\/td><td class=\"num\">4,803<\/td><td class=\"num\">4,931<\/td><td class=\"num\">4,902<\/td><td class=\"num win\">4,996<\/td><td class=\"num\">4,682<\/td><td class=\"num\">4,613<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>swebench<\/td><td class=\"num\">4,409<\/td><td class=\"num\">4,524<\/td><td class=\"num\">4,561<\/td><td class=\"num\">5,241<\/td><td class=\"num win\">5,589<\/td><td class=\"num\">5,444<\/td><td><span class=\"pt pt5\">suf(ON)<\/span><\/td><\/tr>\n        <tr><td>humaneval<\/td><td class=\"num\">3,462<\/td><td class=\"num\">3,729<\/td><td class=\"num win\">4,208<\/td><td class=\"num\">3,771<\/td><td class=\"num\">3,935<\/td><td class=\"num\">3,435<\/td><td><span class=\"pt pt3\">DSA(ON)<\/span><\/td><\/tr>\n        <tr><td>mbpp<\/td><td class=\"num\">4,690<\/td><td class=\"num\">4,905<\/td><td class=\"num\">4,806<\/td><td class=\"num win\">5,690<\/td><td class=\"num\">5,097<\/td><td class=\"num\">5,221<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>wildchat<\/td><td class=\"num\">4,891<\/td><td class=\"num\">5,066<\/td><td class=\"num\">5,102<\/td><td class=\"num win\">5,729<\/td><td class=\"num\">5,363<\/td><td class=\"num\">5,539<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>lmsys<\/td><td class=\"num\">4,898<\/td><td class=\"num\">4,993<\/td><td class=\"num\">5,011<\/td><td class=\"num win\">5,356<\/td><td class=\"num\">4,980<\/td><td class=\"num\">5,116<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>mix<\/td><td class=\"num\">4,938<\/td><td class=\"num\">5,134<\/td><td class=\"num\">5,060<\/td><td class=\"num\">9,056<\/td><td class=\"num\">8,378<\/td><td class=\"num win\">9,240<\/td><td><span class=\"pt pt6\">suf+dsa(ON)<\/span><\/td><\/tr>\n        <tr><td class=\"mname\" rowspan=\"7\">Qwen2.5-72B-Instruct<\/td><td>sharegpt<\/td><td class=\"num\">2,688<\/td><td class=\"num\">2,830<\/td><td class=\"num\">2,906<\/td><td class=\"num win\">3,219<\/td><td class=\"num\">3,095<\/td><td class=\"num\">3,006<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>swebench<\/td><td class=\"num\">2,361<\/td><td class=\"num\">2,474<\/td><td class=\"num\">2,444<\/td><td class=\"num\">2,647<\/td><td class=\"num win\">2,743<\/td><td class=\"num\">2,635<\/td><td><span class=\"pt pt5\">suf(ON)<\/span><\/td><\/tr>\n        <tr><td>humaneval<\/td><td class=\"num\">806<\/td><td class=\"num\">1,989<\/td><td class=\"num win\">2,542<\/td><td class=\"num\">2,489<\/td><td class=\"num\">2,358<\/td><td class=\"num\">2,022<\/td><td><span class=\"pt pt3\">DSA(ON)<\/span><\/td><\/tr>\n        <tr><td>mbpp<\/td><td class=\"num\">3,395<\/td><td class=\"num\">3,417<\/td><td class=\"num win\">3,441<\/td><td class=\"num\">3,234<\/td><td class=\"num\">2,976<\/td><td class=\"num\">2,910<\/td><td><span class=\"pt pt3\">DSA(ON)<\/span><\/td><\/tr>\n        <tr><td>wildchat<\/td><td class=\"num\">2,803<\/td><td class=\"num win\">2,929<\/td><td class=\"num\">2,909<\/td><td class=\"num\">2,621<\/td><td class=\"num\">2,434<\/td><td class=\"num\">2,591<\/td><td><span class=\"pt pt2\">van(ON)<\/span><\/td><\/tr>\n        <tr><td>lmsys<\/td><td class=\"num\">2,807<\/td><td class=\"num\">3,169<\/td><td class=\"num\">3,083<\/td><td class=\"num win\">3,429<\/td><td class=\"num\">2,978<\/td><td class=\"num\">3,153<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>mix<\/td><td class=\"num\">2,735<\/td><td class=\"num\">2,967<\/td><td class=\"num\">2,902<\/td><td class=\"num\">5,268<\/td><td class=\"num win\">5,643<\/td><td class=\"num\">5,266<\/td><td><span class=\"pt pt5\">suf(ON)<\/span><\/td><\/tr>\n        <tr><td class=\"mname\" rowspan=\"7\">Llama-3.1-70B-Instruct<\/td><td>sharegpt<\/td><td class=\"num\">3,091<\/td><td class=\"num\">3,177<\/td><td class=\"num\">3,139<\/td><td class=\"num win\">4,864<\/td><td class=\"num\">4,542<\/td><td class=\"num\">4,634<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>swebench<\/td><td class=\"num\">2,878<\/td><td class=\"num\">2,809<\/td><td class=\"num\">2,968<\/td><td class=\"num win\">6,026<\/td><td class=\"num\">5,949<\/td><td class=\"num\">5,455<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>humaneval<\/td><td class=\"num\">3,391<\/td><td class=\"num\">3,456<\/td><td class=\"num\">2,899<\/td><td class=\"num win\">4,728<\/td><td class=\"num\">4,598<\/td><td class=\"num\">4,549<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>mbpp<\/td><td class=\"num\">1,773<\/td><td class=\"num\">1,699<\/td><td class=\"num\">1,778<\/td><td class=\"num win\">3,266<\/td><td class=\"num\">3,243<\/td><td class=\"num\">2,273<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>wildchat<\/td><td class=\"num\">3,172<\/td><td class=\"num\">3,213<\/td><td class=\"num\">3,268<\/td><td class=\"num win\">5,261<\/td><td class=\"num\">5,142<\/td><td class=\"num\">4,966<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>lmsys<\/td><td class=\"num\">3,040<\/td><td class=\"num\">3,145<\/td><td class=\"num\">3,123<\/td><td class=\"num win\">3,958<\/td><td class=\"num\">3,677<\/td><td class=\"num\">3,818<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>mix<\/td><td class=\"num\">3,129<\/td><td class=\"num\">3,206<\/td><td class=\"num\">3,192<\/td><td class=\"num win\">10,400<\/td><td class=\"num\">10,247<\/td><td class=\"num\">8,829<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td class=\"mname\" rowspan=\"7\">DS-R1-Distill-Llama-70B<\/td><td>sharegpt<\/td><td class=\"num\">3,033<\/td><td class=\"num\">3,018<\/td><td class=\"num win\">3,142<\/td><td class=\"num\">2,660<\/td><td class=\"num\">2,579<\/td><td class=\"num\">2,503<\/td><td><span class=\"pt pt3\">DSA(ON)<\/span><\/td><\/tr>\n        <tr><td>swebench<\/td><td class=\"num win\">3,236<\/td><td class=\"num\">3,142<\/td><td class=\"num\">3,182<\/td><td class=\"num\">2,739<\/td><td class=\"num\">2,739<\/td><td class=\"num\">2,642<\/td><td><span class=\"pt pt1\">van(OFF)<\/span><\/td><\/tr>\n        <tr><td>humaneval<\/td><td class=\"num win\">2,852<\/td><td class=\"num\">2,828<\/td><td class=\"num\">2,812<\/td><td class=\"num\">2,788<\/td><td class=\"num\">2,809<\/td><td class=\"num\">2,718<\/td><td><span class=\"pt pt1\">van(OFF)<\/span><\/td><\/tr>\n        <tr><td>mbpp<\/td><td class=\"num\">2,777<\/td><td class=\"num win\">2,989<\/td><td class=\"num\">2,954<\/td><td class=\"num\">2,426<\/td><td class=\"num\">2,328<\/td><td class=\"num\">2,265<\/td><td><span class=\"pt pt2\">van(ON)<\/span><\/td><\/tr>\n        <tr><td>wildchat<\/td><td class=\"num\">3,127<\/td><td class=\"num win\">3,208<\/td><td class=\"num\">3,166<\/td><td class=\"num\">2,658<\/td><td class=\"num\">2,544<\/td><td class=\"num\">2,661<\/td><td><span class=\"pt pt2\">van(ON)<\/span><\/td><\/tr>\n        <tr><td>lmsys<\/td><td class=\"num\">2,992<\/td><td class=\"num win\">3,046<\/td><td class=\"num\">3,045<\/td><td class=\"num\">2,848<\/td><td class=\"num\">2,756<\/td><td class=\"num\">2,844<\/td><td><span class=\"pt pt2\">van(ON)<\/span><\/td><\/tr>\n        <tr><td>mix<\/td><td class=\"num\">3,164<\/td><td class=\"num\">3,244<\/td><td class=\"num\">3,198<\/td><td class=\"num\">6,127<\/td><td class=\"num win\">6,175<\/td><td class=\"num\">5,818<\/td><td><span class=\"pt pt5\">suf(ON)<\/span><\/td><\/tr>\n        <tr><td class=\"mname\" rowspan=\"7\">Llama-3.1-405B-FP8<\/td><td>sharegpt<\/td><td class=\"num\">1,217<\/td><td class=\"num\">1,239<\/td><td class=\"num\">1,229<\/td><td class=\"num win\">2,061<\/td><td class=\"num\">\u2014<\/td><td class=\"num\">\u2014<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>swebench<\/td><td class=\"num\">1,204<\/td><td class=\"num\">1,211<\/td><td class=\"num\">1,239<\/td><td class=\"num win\">2,639<\/td><td class=\"num\">\u2014<\/td><td class=\"num\">\u2014<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>humaneval<\/td><td class=\"num\">1,253<\/td><td class=\"num\">1,237<\/td><td class=\"num\">1,192<\/td><td class=\"num win\">2,112<\/td><td class=\"num\">\u2014<\/td><td class=\"num\">\u2014<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>mbpp<\/td><td class=\"num\">916<\/td><td class=\"num\">883<\/td><td class=\"num\">815<\/td><td class=\"num win\">1,725<\/td><td class=\"num\">\u2014<\/td><td class=\"num\">\u2014<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>wildchat<\/td><td class=\"num\">1,280<\/td><td class=\"num\">1,267<\/td><td class=\"num\">1,263<\/td><td class=\"num win\">2,290<\/td><td class=\"num\">\u2014<\/td><td class=\"num\">\u2014<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>lmsys<\/td><td class=\"num\">1,220<\/td><td class=\"num\">1,247<\/td><td class=\"num\">1,221<\/td><td class=\"num win\">2,243<\/td><td class=\"num\">\u2014<\/td><td class=\"num\">\u2014<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td>mix<\/td><td class=\"num\">1,252<\/td><td class=\"num\">1,252<\/td><td class=\"num\">1,271<\/td><td class=\"num win\">2,829<\/td><td class=\"num\">\u2014<\/td><td class=\"num\">\u2014<\/td><td><span class=\"pt pt4\">suf(OFF)<\/span><\/td><\/tr>\n        <tr><td class=\"mname\" rowspan=\"7\">DeepSeek-R1 (671B MoE)<\/td><td>sharegpt<\/td><td class=\"num\">1,475<\/td><td class=\"num win\">1,565<\/td><td class=\"num\">1,559<\/td><td class=\"num\">797<\/td><td class=\"num\">730<\/td><td class=\"num\">794<\/td><td><span class=\"pt pt2\">van(ON)<\/span><\/td><\/tr>\n        <tr><td>swebench<\/td><td class=\"num\">1,474<\/td><td class=\"num win\">1,536<\/td><td class=\"num\">1,518<\/td><td class=\"num\">538<\/td><td class=\"num\">496<\/td><td class=\"num\">542<\/td><td><span class=\"pt pt2\">van(ON)<\/span><\/td><\/tr>\n        <tr><td>humaneval<\/td><td class=\"num\">1,004<\/td><td class=\"num\">961<\/td><td class=\"num\">858<\/td><td class=\"num\">606<\/td><td class=\"num win\">1,219<\/td><td class=\"num\">670<\/td><td><span class=\"pt pt5\">suf(ON)<\/span><\/td><\/tr>\n        <tr><td>mbpp<\/td><td class=\"num\">1,437<\/td><td class=\"num\">1,482<\/td><td class=\"num win\">1,490<\/td><td class=\"num\">677<\/td><td class=\"num\">661<\/td><td class=\"num\">669<\/td><td><span class=\"pt pt3\">DSA(ON)<\/span><\/td><\/tr>\n        <tr><td>wildchat<\/td><td class=\"num\">1,556<\/td><td class=\"num win\">1,614<\/td><td class=\"num\">1,614<\/td><td class=\"num\">858<\/td><td class=\"num\">880<\/td><td class=\"num\">824<\/td><td><span class=\"pt pt2\">van(ON)<\/span><\/td><\/tr>\n        <tr><td>lmsys<\/td><td class=\"num\">1,533<\/td><td class=\"num\">1,587<\/td><td class=\"num win\">1,592<\/td><td class=\"num\">811<\/td><td class=\"num\">808<\/td><td class=\"num\">773<\/td><td><span class=\"pt pt3\">DSA(ON)<\/span><\/td><\/tr>\n        <tr><td>mix<\/td><td class=\"num\">1,538<\/td><td class=\"num\">1,599<\/td><td class=\"num win\">1,601<\/td><td class=\"num\">781<\/td><td class=\"num\">754<\/td><td class=\"num\">727<\/td><td><span class=\"pt pt3\">DSA(ON)<\/span><\/td><\/tr>\n      <\/tbody>\n      <caption>winner \ubd84\ud3ec (70 cells): suf(OFF) 36 \u00b7 van(ON) 11 \u00b7 DSA(ON) 11 \u00b7 suf(ON) 6 \u00b7 suf+dsa(ON) 4 \u00b7 van(OFF) 2. \u2014 = 405B-FP8 engine init fail.<\/caption>\n    <\/table><\/div>\n\n  <p>\uc5ec\uae30\uae4c\uc9c0\uac00 \uc694\uc57d\uc774\ub2e4. <span class=\"pt pt4\">suf(OFF)<\/span>\uac00 70\uc140 \uc911 36\uc140\uc758 winner \u2014\n  \uc804\ud3b8\uc758 \ucc98\ubc29\uc740 dense \ubaa8\ub378\uc5d0\uc11c\ub294 \ud06c\uae30\ub97c \uac00\ub9ac\uc9c0 \uc54a\uace0 \uc774\uc2dd\ub418\uc5c8\ub2e4.\n  \ub300\uc2e0 <strong>\ub450 \uac1c\uc758 \uc0c8 \uacbd\uacc4<\/strong>(405B-FP8\uc758 \ubd80\ud305 \ud55c\uacc4, 671B MoE\uc758 \uc804\uba74 \ud68c\uadc0)\uc640,\n  \uc124\uc815\uc774 \uc544\ub2cc <strong>\ud638\uc2a4\ud2b8 \uc0c1\ud0dc<\/strong>\uac00 \ub9cc\ub4e0 +36%\uc758 \uc815\uccb4\ub97c \ub530\uc838\uc57c \ud588\ub2e4. \ucc28\ub840\ub85c \uac04\ub2e4.<\/p>\n<\/section>\n\n<!-- \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 02 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 -->\n<section id=\"s02\">\n  <div class=\"sec-no\">02<\/div>\n  <h2>\uc5ec\uc12f \uac1c\uc758 \uce21\uc815\uc810<\/h2>\n  <p class=\"tagline\">\uc804\ud3b8\uacfc \ubb34\uc5c7\uc774 \ub2e4\ub978\uac00. \ucd95\uc774 \uc138 \uac1c \ub298\uc5c8\ub2e4.<\/p>\n\n  <p>\uc804\ud3b8\uc774 H100\u00d78\uc5d0\uc11c \ubaa8\ub378 \ud558\ub098(+\uacbd\uacc4 \ud0d0\uc0c9\uc6a9 small model\ub4e4)\uc640 synthetic \uc6cc\ud06c\ub85c\ub4dc\ub85c \ub2f5\ud588\ub2e4\uba74,\n  \ubcf8 sweep\uc740 \uc138 \ucd95\uc744 \ubc14\uafe8\ub2e4. \ud558\ub4dc\uc6e8\uc5b4\ub294 <strong>B200\u00d78<\/strong>(sm_100, HBM3e 183 GiB), \ubaa8\ub378\uc740 <strong>7B dense\ubd80\ud130 671B MoE\uae4c\uc9c0 10\uac1c<\/strong>,\n  corpus\ub294 sonnet \uac19\uc740 \ud569\uc131 \ubd84\ud3ec \ub300\uc2e0 <strong>real trace 7\uc885<\/strong> \u2014 ShareGPT, WildChat, LMSYS, HumanEval, MBPP, SWE-Bench, \uadf8\ub9ac\uace0 \uc6b4\uc601 proxy\uc778 mix\ub2e4.<\/p>\n\n  <p>\uadf8\ub9ac\uace0 \ucd95\uc774 \ud558\ub098 \ub354 \uc788\ub2e4. \uce21\uc815 \ub3c4\uc911 \ud638\uc2a4\ud2b8\uc758 <strong>Intel DSA(Data Streaming Accelerator) work queue \uc0c1\ud0dc<\/strong>\uac00\n  vllm \uc124\uc815\uacfc \ubb34\uad00\ud558\uac8c \uacb0\uacfc\ub97c \ud754\ub4e0\ub2e4\ub294 \uc0ac\uc2e4\uc774 \ud655\uc778\ub418\uc5b4, \uc774\ub97c \ubcc4\ub3c4 \ucc28\uc6d0\uc73c\ub85c \ubd84\ub9ac\ud588\ub2e4.\n  \uadf8 \uacb0\uacfc\uac00 \ubaa8\ub378\u00d7corpus \ub2f9 6\uac1c\uc758 \uce21\uc815\uc810\uc774\ub2e4.<\/p>\n\n  <div class=\"tbl-scroll\"><table>\n    <thead><tr><th>ID<\/th><th>label<\/th><th>host DSA WQ<\/th><th>spec decode<\/th><th>vllm DSA env<\/th><th>\ucd9c\ucc98<\/th><\/tr><\/thead>\n    <tbody>\n      <tr><td><span class=\"pt pt1\">\u2460<\/span><\/td><td>van(OFF)<\/td><td>disabled<\/td><td>\u2014<\/td><td>\u2014<\/td><td>TSK_042 baseline<\/td><\/tr>\n      <tr><td><span class=\"pt pt2\">\u2461<\/span><\/td><td>van(ON)<\/td><td><strong>enabled<\/strong><\/td><td>\u2014<\/td><td>\u2014<\/td><td>\ubcf8 \uc2e4\ud5d8<\/td><\/tr>\n      <tr><td><span class=\"pt pt3\">\u2462<\/span><\/td><td>DSA(ON)<\/td><td>enabled<\/td><td>\u2014<\/td><td><code>VLLM_LHC_DSA=1<\/code><\/td><td>\ubcf8 \uc2e4\ud5d8<\/td><\/tr>\n      <tr><td><span class=\"pt pt4\">\u2463<\/span><\/td><td>suf(OFF)<\/td><td>disabled<\/td><td>suffix K=32<\/td><td>\u2014<\/td><td>TSK_042 baseline<\/td><\/tr>\n      <tr><td><span class=\"pt pt5\">\u2464<\/span><\/td><td>suf(ON)<\/td><td><strong>enabled<\/strong><\/td><td>suffix K=32<\/td><td>\u2014<\/td><td>\ubcf8 \uc2e4\ud5d8<\/td><\/tr>\n      <tr><td><span class=\"pt pt6\">\u2465<\/span><\/td><td>suf+dsa(ON)<\/td><td>enabled<\/td><td>suffix K=32<\/td><td><code>on<\/code><\/td><td>\ubcf8 \uc2e4\ud5d8<\/td><\/tr>\n    <\/tbody>\n    <caption>10 models \u00d7 7 corpora \u00d7 6 points = 420\uc140 \uc911 406\uc140 \uce21\uc815 \uc644\ub8cc(96.7%). \ubbf8\uce21\uc815 14\uc140\uc740 \uc804\ubd80 405B-FP8\uc758 \u2464\u2465(\u00a706).<\/caption>\n  <\/table><\/div>\n\n  <p>suffix \uc124\uc815\uc740 \uc804\ud3b8\uc758 dominant \uad6c\uc131\uc744 \uadf8\ub300\ub85c \uc2b9\uacc4\ud588\ub2e4 \u2014 <code>{\"method\":\"suffix\",\"num_speculative_tokens\":32}<\/code>,\n  tree depth\u00b7cache \ub4f1 \ub098\uba38\uc9c0\ub294 default. vllm DSA env(\u2462\u2465)\ub294 host\u2194pinned memcpy\ub97c DSA\ub85c \uc624\ud504\ub85c\ub4dc\ud558\ub294 lever\ub85c,\n  <code>VLLM_LHC_DSA=1 VLLM_LEVER_N9=1 VLLM_LHC_DSA_MIN=65536<\/code>(64 KiB \ubbf8\ub9cc copy\ub294 CPU \uacbd\ub85c \uc720\uc9c0)\uc774\ub2e4.\n  \ubca4\uce58\ub9c8\ud06c\ub294 real trace prompt\uc5d0 concurrency 32, max_tokens 8,192, streaming \u2014 7\uac1c corpus \uc804\ubd80 \ub3d9\uc77c harness\ub2e4.<\/p>\n\n  <figure class=\"fig-img\"><img decoding=\"async\" src=\"http:\/\/kyunam.com\/wp-content\/uploads\/2026\/06\/vllm2-figure-02.png\" alt=\"suffix speculative decoding \ub3d9\uc791 \ub2e4\uc774\uc5b4\uadf8\ub7a8 \u2014 step\ub2f9 1\ud1a0\ud070\uc5d0\uc11c K\ud1a0\ud070\uc73c\ub85c\" loading=\"lazy\"><figcaption class=\"fig-cap\">vLLM \uae30\ubcf8 \uc124\uc815\uc740 forward step\ub2f9 1\ud1a0\ud070. suffix\ub294 prompt\uc640 \uc774\ubbf8 \uc0dd\uc131\ud55c \ud1a0\ud070 \uc591\ucabd\uc744 suffix tree\ub85c \uc778\ub371\uc2f1\ud574 draft\ub97c \ubf51\uace0, \ud55c \ubc88\uc758 forward step\uc5d0\uc11c K\u0304\uac1c\ub97c \ud55c\uaebc\ubc88\uc5d0 \ud655\uc815\ud55c\ub2e4. \uac00\uc18d\u00b7\ud68c\uadc0\ub294 K\u0304\uac00 step overhead \ubc30\uc218 R\uc744 \ub118\ub290\ub0d0(K &gt; R)\ub85c \uac08\ub9b0\ub2e4 \u2014 \uc804\ud3b8 \u00a703.<\/figcaption><\/figure>\n\n<\/section>\n\n<!-- \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 03 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 -->\n<section id=\"s03\">\n  <div class=\"sec-no\">03<\/div>\n  <h2>\uc804\ud3b8\uc758 \uacb0\ub860\uc740 \uc0b4\uc544\ub0a8\uc558\ub294\uac00<\/h2>\n  <p class=\"tagline\">36\/70\uc140. \ubd80\ub4f1\uc2dd\uc740 \uc0b4\uc544\ub0a8\uc558\uace0, \uc0c1\uc218\ub294 \uc774\ub3d9\ud588\ub2e4.<\/p>\n\n  <p>70\uac1c (\ubaa8\ub378, corpus) \uc140\uc5d0\uc11c 6\uac1c \uce21\uc815\uc810 \uc911 winner\uc758 \ubd84\ud3ec\ub294 \ub2e4\uc74c\uacfc \uac19\ub2e4.<\/p>\n\n  <div class=\"tbl-scroll\"><table>\n    <thead><tr><th>winner<\/th><th class=\"num\">\uc140 \uc218<\/th><th>\ube44\uace0<\/th><\/tr><\/thead>\n    <tbody>\n      <tr><td><span class=\"pt pt4\">\u2463 suf(OFF)<\/span><\/td><td class=\"num\"><strong>36<\/strong><\/td><td>\uc804\uccb4\uc758 \uc808\ubc18 \uc774\uc0c1. suffix decoding\uc774 dominant lever<\/td><\/tr>\n      <tr><td><span class=\"pt pt2\">\u2461 van(ON)<\/span><\/td><td class=\"num\">11<\/td><td>\ub300\ubd80\ubd84 DS-Distill-Llama-70B\uc640 671B MoE<\/td><\/tr>\n      <tr><td><span class=\"pt pt3\">\u2462 DSA(ON)<\/span><\/td><td class=\"num\">11<\/td><td>\u2461\uc640 \ud1b5\uacc4\uc801\uc73c\ub85c \uad6c\ubd84 \uc5b4\ub824\uc6c0(\u00a705)<\/td><\/tr>\n      <tr><td><span class=\"pt pt5\">\u2464 suf(ON)<\/span><\/td><td class=\"num\">6<\/td><td rowspan=\"2\">\uc870\ud569 lever\uac00 \uc774\uae30\ub294 \uc140\uc740 \uc18c\uc218, corpus-dependent<\/td><\/tr>\n      <tr><td><span class=\"pt pt6\">\u2465 suf+dsa(ON)<\/span><\/td><td class=\"num\">4<\/td><\/tr>\n      <tr><td><span class=\"pt pt1\">\u2460 van(OFF)<\/span><\/td><td class=\"num\">2<\/td><td>DS-Distill-Llama-70B \uc77c\ubd80 corpus<\/td><\/tr>\n    <\/tbody>\n  <\/table><\/div>\n\n  <p>\uc804\ud3b8\uc758 \uccab \ubc88\uc9f8 \uacb0\ub860 \u2014 <em>\uac00\uc18d\uc758 \ub300\ubd80\ubd84\uc740 suffix decoding\uc744 \ucf1c\ub294 \uac83 \uc790\uccb4\uc5d0\uc11c \ub098\uc628\ub2e4<\/em> \u2014 \uc740 \ubaa8\ub378 10\uac1c\ub85c \ud655\uc7a5\ud574\ub3c4 \uadf8\ub300\ub85c\ub2e4.\n  \ud45c\uc900 dense \ubaa8\ub378(Qwen 7B\/32B\/72B, Llama 8B\/70B\/405B)\uc5d0\uc11c suffix\ub294 corpus\ub97c \ubd88\ubb38\ud558\uace0 +50~+232%\uc758 suf-gain\uc744 \ub9cc\ub4e4\uc5c8\uace0,\n  DSA \uacc4\uc5f4 lever\ub4e4\uc740 \uadf8 \uc704\uc5d0\uc11c \u00b110% \uc548\ucabd\uc744 \uc624\uac14\ub2e4.<\/p>\n\n  <h3>\ub2e4\ub9cc, 7B \uacbd\uacc4\ub294 \uc774\ub3d9\ud588\ub2e4<\/h3>\n\n  <p>\uc804\ud3b8\uc5d0\uc11c \uac00\uc7a5 \ub2e8\ud638\ud558\uac8c \uc37c\ub358 \ubb38\uc7a5\uc774 \ud558\ub098 \uc788\ub2e4. <em>&#8220;7B \uc774\ud558 \ubaa8\ub378\uc5d0\uc11c\ub294 \ubb34\uc5c7\uc744 \ucf1c\ub3c4 \ub290\ub824\uc9c4\ub2e4.&#8221;<\/em>\n  \ubcf8 sweep\uc5d0\uc11c \uc774 \ubb38\uc7a5\uc740 \uae68\uc84c\ub2e4. Qwen2.5-7B\ub294 mix\uc5d0\uc11c +87.2%, Llama-3.1-8B\ub294 +214.7%,\n  DS-Distill-Qwen-7B\ub294 +170.0% \u2014 \uc804\ubd80 suffix\uac00 \ud070 \ud3ed\uc758 net-positive\ub2e4.<\/p>\n\n  <p>\ubaa8\uc21c\ucc98\ub7fc \ubcf4\uc774\uc9c0\ub9cc, \uc804\ud3b8\uc758 mechanism\uc73c\ub85c \ub3cc\uc544\uac00\uba74 \uc624\ud788\ub824 \uc77c\uad00\uc801\uc774\ub2e4. \uac00\uc18d\u00b7\ud68c\uadc0\ub97c \uac00\ub974\ub294 \uac74 \ubaa8\ub378 \ud06c\uae30\uac00 \uc544\ub2c8\ub77c\n  \ubd80\ub4f1\uc2dd <strong>K &gt; R<\/strong> \u2014 \ud3c9\uade0 \ucc44\ud0dd \ud1a0\ud070 \uc218\uac00 spec step overhead \ubc30\uc218\ub97c \ub118\ub290\ub0d0 \u2014 \uc774\uace0,\n  \ubaa8\ub378 \ud06c\uae30\ub294 R\uc744, corpus\ub294 K\ub97c \uc6c0\uc9c1\uc774\ub294 \ubcc0\uc218\uc77c \ubfd0\uc774\ub2e4. \uc804\ud3b8\uc758 7B \ud68c\uadc0\ub294 word-salad code \uac19\uc740\n  synthetic corpus\uc5d0\uc11c K\uac00 \ubc14\ub2e5\uc774\uc5c8\ub358 \ud658\uacbd\uc758 \uacb0\ub860\uc774\uace0, \ubcf8 \uc2e4\ud5d8\uc758 real chat trace(ShareGPT\u00b7WildChat)\ub294\n  \ubc18\ubcf5 \uad6c\uc870\uac00 \ud48d\ubd80\ud574 suffix tree\uc758 K\uac00 \ucda9\ubd84\ud788 \ucee4\uc9c4\ub2e4. \ud558\ub4dc\uc6e8\uc5b4\uac00 H100\uc5d0\uc11c B200\uc73c\ub85c \ubc14\ub010 \uac83\ub3c4 \uc0c1\uc218\ub97c \uc6c0\uc9c1\uc600\uc744 \uac83\uc774\ub2e4.\n  \uc989 <strong>\ubd80\ub4f1\uc2dd\uc740 \ud658\uacbd\uc744 \uac74\ub108 \uc0b4\uc544\ub0a8\uc558\uace0, &#8220;7B&#8221;\ub77c\ub294 \uc22b\uc790\ub294 \uadf8 \ud658\uacbd\uc758 \uc0c1\uc218\uc600\ub2e4<\/strong>.\n  \uacbd\uacc4\ub97c \ubaa8\ub378 \ud06c\uae30\ub85c \uc678\uc6b0\uc9c0 \ub9d0\uace0, \uc790\uae30 corpus\uc5d0\uc11c K\ub97c \uc7ac\ub77c\ub294 \uac83\uc774 \uc218\uc815\ub41c \uad50\ud6c8\uc774\ub2e4.<\/p>\n\n  <p>corpus\ubcc4 \uacb0\uc774 \ub0a8\uc544 \uc788\ub2e4\ub294 \uc810\ub3c4 \uac19\ub2e4. DS-Distill-Qwen-7B\ub294 mix\uc5d0\uc11c +170%\uc9c0\ub9cc sharegpt\u00b7wildchat\uc5d0\uc11c\ub294\n  van(ON)\uc774 suffix\ub97c \uc774\uae34\ub2e4. mix \uc140\uc758 suf-gain\uc774 \uac1c\ubcc4 corpus\ubcf4\ub2e4 \uc77c\uad00\ub418\uac8c \ud070 \uac83\ub3c4 \ub208\uc5d0 \ub744\ub294 \ud328\ud134\uc778\ub370\n  (Llama-70B: per-corpus \ucd5c\ub300 6,026 vs mix 10,400), \uc774\uc9c8\uc801 \uc694\uccad\uc774 \uc11e\uc77c \ub54c global suffix tree\uc758 hit \uae30\ud68c\uac00\n  \ub298\uc5b4\ub098\ub294 \ud6a8\uacfc\ub85c \ubcf4\uc774\ub098 \ubcf8 \uc2e4\ud5d8\uc5d0\uc11c \ubd84\ub9ac \uac80\uc99d\ud558\uc9c0\ub294 \uc54a\uc558\ub2e4 \u2014 mix \uc218\uce58\ub97c \ub2e8\uc77c corpus \uc6b4\uc601\uc5d0 \uadf8\ub300\ub85c \uc678\uc0bd\ud558\uba74 \uc548 \ub41c\ub2e4(\u00a708).<\/p>\n<\/section>\n\n<!-- \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 04 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 -->\n<section id=\"s04\">\n  <div class=\"sec-no\">04<\/div>\n  <h2>+36%\uc758 \uc815\uccb4<\/h2>\n  <p class=\"tagline\">\uc124\uc815\uc744 \ud558\ub098\ub3c4 \ubc14\uafb8\uc9c0 \uc54a\uc558\ub294\ub370, \uae30\ubcf8 \uc124\uc815 \uce21\uc815\uac12\uc774 \ube68\ub77c\uc84c\ub2e4.<\/p>\n\n  <p>\uc774\ubc88 \uce21\uc815\uc5d0\uc11c \uac00\uc7a5 \uc2dc\uac04\uc744 \uc4f4 \uac74 suffix\uac00 \uc544\ub2c8\ub77c \uc774\ucabd\uc774\ub2e4.\n  TSK_042 baseline(6\uc6d4 2\uc77c)\uacfc \ubcf8 \uc2e4\ud5d8(6\uc6d4 10\uc77c~)\uc758 vLLM \uae30\ubcf8 \uc124\uc815\uc744 \ube44\uad50\ud558\ub2c8, <strong>vllm \uc124\uc815\uc774 \ub3d9\uc77c\ud55c\ub370\ub3c4<\/strong>\n  7~8B \ubaa8\ub378\uc5d0\uc11c +33~+36%, Qwen-32B\uc5d0\uc11c\ub294 +53.6%\uc758 \ucc28\uc774\uac00 \ub0ac\ub2e4. \ub450 \uce21\uc815 \uc0ac\uc774\uc5d0 \ubc14\ub010 \ud638\uc2a4\ud2b8 \uc0c1\ud0dc\ub294 \ud558\ub098 \u2014\n  6\uc6d4 8\uc77c, LHC Phase \uc791\uc5c5\uc758 \uc77c\ud658\uc73c\ub85c <code>\/sys\/bus\/dsa\/devices\/wq*\/state<\/code>\uc758\n  DSA work queue 8\uac1c(dsa0\/dsa1 \u00d7 4)\uac00 enable\ub41c \uac83\uc774\ub2e4.<\/p>\n\n  <div class=\"tbl-scroll\"><table>\n    <thead><tr><th>\uce21\uc815 \uc2dc\uc810<\/th><th>host DSA WQ<\/th><th>\ud574\ub2f9 \uce21\uc815\uc810<\/th><\/tr><\/thead>\n    <tbody>\n      <tr><td>2026-06-02 (TSK_042)<\/td><td><strong>DISABLED<\/strong><\/td><td>\u2460 van(OFF) \u00b7 \u2463 suf(OFF)<\/td><\/tr>\n      <tr><td>2026-06-08 00:40<\/td><td colspan=\"2\">wq enable (sysfs mtime \uc2e4\uce21) \u2014 LHC Phase \uc791\uc5c5<\/td><\/tr>\n      <tr><td>2026-06-10+ (\ubcf8 \uc2e4\ud5d8)<\/td><td><strong>ENABLED<\/strong><\/td><td>\u2461 \u2462 \u2464 \u2465<\/td><\/tr>\n    <\/tbody>\n  <\/table><\/div>\n\n  <figure class=\"fig-img\"><img decoding=\"async\" src=\"http:\/\/kyunam.com\/wp-content\/uploads\/2026\/06\/vllm2-figure-03.png\" alt=\"host DSA \uad6c\uc131 \ub2e4\uc774\uc5b4\uadf8\ub7a8 \u2014 dsa0\/dsa1, WQ 8\uac1c enable\" loading=\"lazy\"><figcaption class=\"fig-cap\">\ub514\ubc14\uc774\uc2a4\ub2f9 8\uac1c WQ \uc911 4\uac1c\ub9cc enable(\ub179\uc0c9), \ub098\uba38\uc9c0\ub294 size=0 disabled(\uc810\uc120). enabled WQ\ub294 mode=shared(SWQ)\u00b7type=user\ub85c ENQCMD \uc720\uc800\uacf5\uac04 \uc81c\ucd9c\uc774 \uac00\ub2a5\ud55c \uc0c1\ud0dc\uc600\ub2e4 \u2014 \uadf8\ub7ec\ub098 \uc2e4\uc81c\ub85c \uc4f0\ub294 \ud504\ub85c\uc138\uc2a4\ub294 \uc5c6\uc5c8\ub2e4\ub294 \uac83\uc774 \u2461\u2464\uc758 +36%\ub97c &#8220;DSA \ud6a8\uacfc&#8221;\ub85c \uc77d\uc744 \uc218 \uc5c6\ub294 \uc774\uc720\ub2e4.<\/figcaption><\/figure>\n\n\n  <p>\ud6a8\uacfc\ub294 method\uc5d0 \ub530\ub77c \ucc28\ub4f1\uc801\uc774\ub2e4. host-bound regime\uc778 <strong>vLLM \uae30\ubcf8 \uc124\uc815\uc5d0\uc11c\ub294 +33~+36%<\/strong>(\uc18c\ud615 \ubaa8\ub378 \uae30\uc900 \u2014 70B\uae09\uc740 +2~8%,\n  405B\ub294 0%\ub85c \ubaa8\ub378\uc774 \ud074\uc218\ub85d \ud76c\uc11d), step-bound regime\uc778 <strong>suffix\uc5d0\uc11c\ub294 \u22125~+10%<\/strong>\ub85c \ud6a8\uacfc\uac00 \uc5c6\uac70\ub098 \uc57d\ud55c \uc190\ud574\ub2e4.\n  \uc989 \uc774 \ud6a8\uacfc\uac00 \uc2e4\uc7ac\ud55c\ub2e4\uba74 &#8220;host memcpy\uac00 \ubcd1\ubaa9\uc778 \uad6c\uac04&#8221;\uc5d0\uc11c\ub9cc \uc791\ub3d9\ud558\ub294 lever\ub77c\ub294 \ud574\uc11d\uacfc \ubd80\ud569\ud55c\ub2e4.<\/p>\n\n  <div class=\"note\">\n    <b>\ub2e8, \uc774 +36%\ub97c &#8220;DSA\ub97c \ucf1c\uba74 \ube68\ub77c\uc9c4\ub2e4&#8221;\ub85c \uc77d\uc73c\uba74 \uc548 \ub41c\ub2e4.<\/b>\n    sweep \uc2dc\uc810 sysfs \uc2e4\uce21\uc5d0\uc11c enabled WQ 8\uac1c \uc804\ubd80 <code>clients=0<\/code> \u2014 \uc989 <strong>\uc5b4\ub5a4 \ud504\ub85c\uc138\uc2a4\ub3c4 WQ\ub97c \uc2e4\uc81c\ub85c \uc0ac\uc6a9\ud558\uace0 \uc788\uc9c0 \uc54a\uc558\ub2e4<\/strong>.\n    \ud55c\ud3b8 \u2460\u2463\uc758 \ucd9c\ucc98\uc778 TSK_042\uc640 \ubcf8 \uc2e4\ud5d8\uc740 cudagraph \uacbd\ub85c\uac00 \ub2e4\ub97c \uc218 \uc788\uc5b4(PIECEWISE \u2192 FULL_AND_PIECEWISE),\n    +36%\uc758 \uc9c4\uc9dc \uc6d0\uc778\uc774 <strong>cudagraph_mode \ucc28\uc774\ub77c\ub294 \uac00\uc124\uc774 \ud604\uc7ac \uc720\ub825<\/strong>\ud558\ub2e4(SUB_213).\n    \ubcf8 \uae00\uc5d0\uc11c ON\/OFF\ub294 &#8220;\uadf8 \uc2dc\uc810 \ud638\uc2a4\ud2b8 \uc0c1\ud0dc&#8221;\uc758 \ub77c\ubca8\ub85c\ub9cc \uc4f0\uba70, \uc778\uacfc\ub294 \uaca9\ub9ac \uac80\uc99d(verify_dsa.sh C1\/C2\/C3) \uacb0\uacfc\ub97c \uae30\ub2e4\ub9b0\ub2e4.\n  <\/div>\n\n  <p>\uc6b4\uc601 \uad00\uc810\uc758 \ud568\uc758\ub294 \uc778\uacfc\uc640 \ubb34\uad00\ud558\uac8c \ubd84\uba85\ud558\ub2e4. <strong>vllm \uc778\uc790 \ubc14\uae65\uc758 \ud638\uc2a4\ud2b8 \uc0c1\ud0dc\uac00 \uce21\uc815\uc744 \uc218\uc2ed % \ub2e8\uc704\ub85c \ud754\ub4e4 \uc218 \uc788\ub2e4<\/strong>\ub294 \uac83.\n  \uc804\ud3b8 \u00a709-6\uc5d0\uc11c &#8220;\uce21\uc815 \uc870\uac74\uc774 \ub2e4\ub978 \uac12\ub07c\ub9ac \uc9c1\uc811 \ube44\uad50\ud558\uc9c0 \ub9d0\ub77c&#8221;\uace0 \uc37c\ub294\ub370, \uadf8 \ubaa9\ub85d\uc5d0 sysfs \ub808\ubca8\uc758 \ud638\uc2a4\ud2b8 \uad6c\uc131\uae4c\uc9c0 \ub4e4\uc5b4\uac00\uc57c \ud55c\ub2e4\ub294 \uac78\n  \uc774\ubc88\uc5d0 \uce21\uc815\uc73c\ub85c \ubc30\uc6e0\ub2e4. \ucc38\uace0\ub85c \uc774 WQ \uad6c\uc131\uc740 <code>\/etc\/accel-config\/<\/code>\uc5d0 \uc800\uc7a5\ub3fc \uc788\uc9c0 \uc54a\uc740 \uc218\ub3d9 enable \uc0c1\ud0dc\ub77c\n  <strong>\uc7ac\ubd80\ud305\ud558\uba74 \uc18c\uc2e4<\/strong>\ub41c\ub2e4 \u2014 \uc7ac\ud604\uc774 \ud544\uc694\ud558\uba74 <code>accel-config save-config<\/code>\uac00 \uc120\ud589\ub3fc\uc57c \ud55c\ub2e4.<\/p>\n<\/section>\n\n<!-- \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 05 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 -->\n<section id=\"s05\">\n  <div class=\"sec-no\">05<\/div>\n  <h2>vllm \ub808\ubca8 DSA env\ub294 \ubd80\ucc28\uc801\uc774\ub2e4<\/h2>\n  <p class=\"tagline\">\ud638\uc2a4\ud2b8\uac00 driver, env\ub294 noise.<\/p>\n\n  <p>\uadf8\ub807\ub2e4\uba74 vllm \ud504\ub85c\uc138\uc2a4\uac00 \uba85\uc2dc\uc801\uc73c\ub85c DSA lane\uc744 \uc4f0\ub3c4\ub85d \ud558\ub294 env(\u2462\u2465)\ub294 \uc5b4\ub5a4\uac00.<\/p>\n\n<pre><code>VLLM_LHC_DSA=1        <span class=\"cm\"># LHC DSA lane \ud65c\uc131 (default off)<\/span>\nVLLM_LEVER_N9=1       <span class=\"cm\"># host\u2194pinned memcpy DSA \uc624\ud504\ub85c\ub4dc<\/span>\nVLLM_LHC_DSA_MIN=65536 <span class=\"cm\"># 64 KiB \ubbf8\ub9cc copy\ub294 CPU \uacbd\ub85c \uc720\uc9c0<\/span><\/code><\/pre>\n\n  <figure class=\"fig-img\"><img decoding=\"async\" src=\"http:\/\/kyunam.com\/wp-content\/uploads\/2026\/06\/vllm2-figure-04.png\" alt=\"vllm DSA lane copy \uacbd\ub85c \ubd84\uae30 \ud750\ub984\ub3c4\" loading=\"lazy\"><figcaption class=\"fig-cap\"><code>VLLM_LEVER_N9=1<\/code>\uc774 \ucf1c\uc9c4 \ud504\ub85c\uc138\uc2a4\uc758 host\u2194pinned copy\ub294 64 KiB\ub97c \uae30\uc900\uc73c\ub85c DSA lane\uacfc CPU \uacbd\ub85c\ub85c \ubd84\uae30\ud55c\ub2e4. \uacbd\ub85c \uc790\uccb4\ub294 \ub3d9\uc791\ud558\uc9c0\ub9cc, throughput\uc5d0 \ub0a8\uae30\ub294 \ud754\uc801\uc740 vLLM \uae30\ubcf8 \uc124\uc815 \uc704 0%, suffix \uc704 \u00b15%\uc600\ub2e4.<\/figcaption><\/figure>\n\n\n  <p>\uacb0\uacfc\ub294 \uae54\ub054\ud558\uac8c \uc2ec\uc2ec\ud558\ub2e4. <strong>vLLM \uae30\ubcf8 \uc124\uc815 \uc704(\u2462 vs \u2461)\uc5d0\uc11c\ub294 0%<\/strong> \u2014 10\uac1c \ubaa8\ub378\uc758 mix \uae30\uc900 \u22122.2~+1.5%\ub85c noise \ubc94\uc704\ub2e4.\n  <strong>suffix \uc704(\u2465 vs \u2464)\uc5d0\uc11c\ub294 \u00b15%<\/strong> \uc548\ud30e\uc758 corpus-dependent \uc9c4\ub3d9\uc774 \uc788\uc744 \ubfd0 dominant signal\uc774 \uc5c6\ub2e4\n  (Llama-8B +9.0%\uc640 Llama-70B \u221213.8%\uac00 \uacf5\uc874\ud55c\ub2e4). \u00a704\uc758 \ud638\uc2a4\ud2b8 \uc0c1\ud0dc \ud6a8\uacfc\uac00 +36%\uc600\ub358 \uac83\uacfc \ube44\uad50\ud558\uba74 \uacb0\ub860\uc740 \ud558\ub098\ub2e4 \u2014\n  <strong>\uc774 \ucd95\uc5d0\uc11c\ub294 \ud638\uc2a4\ud2b8 \uad6c\uc131\uc774 driver\uc774\uace0, vllm env\ub294 \ubd80\ucc28\uc801<\/strong>\uc774\ub2e4.\n  \u2465\uc774 winner\uc778 \uc140\uc774 70\uac1c \uc911 4\uac1c \uc788\uae34 \ud558\uc9c0\ub9cc, \uc6b4\uc601 \uc815\ucc45\uc758 default\ub85c \uc0bc\uc744 \uadfc\uac70\ub294 \ubabb \ub41c\ub2e4.<\/p>\n<\/section>\n\n<!-- \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 06 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 -->\n<section id=\"s06\">\n  <div class=\"sec-no\">06<\/div>\n  <h2>\uc0c8\ub85c \ub9cc\ub09c \ub450 \uac1c\uc758 \uacbd\uacc4<\/h2>\n  <p class=\"tagline\">\ud558\ub098\ub294 \ubd80\ud305\uc5d0\uc11c, \ud558\ub098\ub294 \uc544\ud0a4\ud14d\ucc98\uc5d0\uc11c.<\/p>\n\n  <h3>\uacbd\uacc4 1 \u2014 Llama-405B-FP8\uc740 suffix\ub85c \ubd80\ud305\uc774 \uc548 \ub41c\ub2e4<\/h3>\n  <p>405B-FP8\uc5d0 suffix K=32\ub97c \ubd99\uc774\uba74 engine init\uc5d0\uc11c <code>num_gpu_blocks=0 \u2192 override=512<\/code> \ud6c4\n  core proc\uac00 crash\ud55c\ub2e4. 405B-FP8 + suffix K=32 + B200 \ub2e8\uc77c \ub178\ub4dc TP=8 + <code>gmu 0.85<\/code> \uc870\ud569\uc758 \ud638\ud658\uc131 \ud55c\uacc4\ub85c,\n  \u2464\u2465 14\uc140\uc774 \uc601\uad6c \ubbf8\uce21\uc815\uc73c\ub85c \ub0a8\uc558\ub2e4(406\/420\uc758 \uacb0\uc190\uc774 \uc804\ubd80 \uc5ec\uae30\ub2e4). \ud765\ubbf8\ub85c\uc6b4 \uac74 host-OFF \uc2dc\uc810\uc758\n  \u2463\ub294 \uba40\uca61\ud788 \uce21\uc815\ub418\uc5b4 <strong>+125.9%<\/strong>\ub77c\ub294, 9\uac1c dense \ubaa8\ub378 \uc911\uc5d0\uc11c\ub3c4 \uc0c1\uc704\uad8c\uc758 suf-gain\uc744 \ubcf4\uc600\ub2e4\ub294 \uc810\uc774\ub2e4.\n  \uc989 405B\uc5d0\uc11c suffix\ub294 &#8220;\ud6a8\uacfc\uac00 \uc5c6\ub294&#8221; \uac8c \uc544\ub2c8\ub77c &#8220;\ud604\uc7ac \ube4c\ub4dc\u00b7\uba54\ubaa8\ub9ac \uad6c\uc131\uc5d0\uc11c \ubabb \ucf1c\ub294&#8221; \uc0c1\ud0dc\ub2e4 \u2014\n  gmu\u00b7K\ub97c \ub0ae\ucd98 \uc7ac\uc2dc\ub3c4\uac00 \ud6c4\uc18d \uacfc\uc81c\ub2e4.<\/p>\n\n  <h3>\uacbd\uacc4 2 \u2014 671B MoE\uc5d0\uc11c suffix\ub294 \uc804\uba74 \ud68c\uadc0\ud55c\ub2e4<\/h3>\n  <p>DeepSeek-R1(671B MoE, active 37B)\uc740 \ubcf8 \uc2e4\ud5d8\uc5d0\uc11c <strong>suffix\uac00 7\uac1c corpus \uc804\ubd80 net-negative<\/strong>\uc778 \uc720\uc77c\ud55c \ubaa8\ub378\uc774\ub2e4.\n  mix \uae30\uc900 \u221249.2%, swebench\uc5d0\uc11c\ub294 1,474 \u2192 538 tok\/s\ub85c \uac70\uc758 1\/3\ud1a0\ub9c9\uc774\ub2e4. dense 405B\uac00 +126%\uc778 \uac83\uacfc \uc815\ud655\ud788 \ub300\ube44\ub41c\ub2e4.\n  \uba54\ucee4\ub2c8\uc998 \uaddc\uba85\uc740 \ubcf8 \uc2e4\ud5d8\uc758 \ubc94\uc704 \ubc16\uc774\ub77c \ub2e8\uc815\ud558\uc9c0 \uc54a\uaca0\ub2e4 \u2014 \uad00\uce21 \uc0ac\uc2e4\ub85c\uc11c \uae30\ub85d\ud558\ub294 \uac83\uc740,\n  <strong>&#8220;\ud06c\uba74 \ud074\uc218\ub85d spec decoding\uc774 \uc720\ub9ac\ud558\ub2e4&#8221;\ub294 dense\uc5d0\uc11c\uc758 \uc9c1\uad00\uc774 MoE\uc5d0\ub294 \uc774\uc2dd\ub418\uc9c0 \uc54a\ub294\ub2e4<\/strong>\ub294 \uc810\uc774\ub2e4.\n  \uc804\ud3b8\uc5d0\uc11c same-size cross-vendor \ucc28\uc774(Llama 70B vs Qwen 72B)\ub97c &#8220;architecture\uac00 spec \ud6a8\uacfc\ub97c \uac00\ub978\ub2e4&#8221;\ub294 \uc2e0\ud638\ub85c \uc77d\uc5c8\ub294\ub370,\n  MoE\ub294 \uadf8 \uc2e0\ud638\uc758 \uadf9\ub2e8\uac12\uc778 \uc148\uc774\ub2e4. 671B MoE\uc758 \uc6b4\uc601 \uc815\ub2f5\uc740 \ud604\uc7ac\ub85c\uc120 vLLM \uae30\ubcf8 \uc124\uc815\uc774\ub2e4.<\/p>\n\n  <figure class=\"fig-img\"><img decoding=\"async\" src=\"http:\/\/kyunam.com\/wp-content\/uploads\/2026\/06\/vllm2-figure-05.png\" alt=\"DeepSeek-R1 671B MoE\uc758 corpus\ubcc4 suffix \uc804\uba74 \ud68c\uadc0 \ucc28\ud2b8\" loading=\"lazy\"><figcaption class=\"fig-cap\">\uac19\uc740 host state(OFF)\uc5d0\uc11c 7\uac1c corpus \uc804\ubd80 suffix\uac00 vLLM \uae30\ubcf8 \uc124\uc815\uc744 \ubc11\ub3c8\ub2e4 \u2014 \ucd5c\ub300 \u221263.5%(swebench). dense 405B\uc758 +125.9%\uc640 \uc815\ud655\ud788 \ub300\ube44\ub418\ub294, \ubcf8 \uc2e4\ud5d8 \ud1b5\ud2c0\uc5b4 \uc720\uc77c\ud55c \uc804\uba74 \ud68c\uadc0.<\/figcaption><\/figure>\n\n<\/section>\n\n<!-- \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 07 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 -->\n<section id=\"s07\">\n  <div class=\"sec-no\">07<\/div>\n  <h2>\uc2e4\uc804 \uc801\uc6a9 \u2014 production decision tree, \uac31\uc2e0\ud310<\/h2>\n  <p class=\"tagline\">\uc804\ud3b8\uc758 \ud2b8\ub9ac\uc5d0 \ud589\uc744 \ucd94\uac00\ud558\uace0, \ud55c \ud589\uc744 \uc218\uc815\ud55c\ub2e4.<\/p>\n\n  <div class=\"tbl-scroll\"><table>\n    <thead><tr><th>\ubaa8\ub378\uad70<\/th><th>\uad8c\uc7a5 lever<\/th><th>\uadfc\uac70 (mix, vs van \ub3d9\uc77c host state)<\/th><\/tr><\/thead>\n    <tbody>\n      <tr><td>\ud45c\uc900 dense 7B~405B<br><span style=\"color:var(--muted);font-size:12.5px\">Qwen 7\/32\/72B \u00b7 Llama 8\/70\/405B<\/span><\/td>\n          <td><span class=\"pt pt4\">\u2463 suf(OFF)<\/span> \u2014 suffix K=32<\/td>\n          <td class=\"num\">+50 ~ +232%<\/td><\/tr>\n      <tr><td>reasoning distill 32B<br><span style=\"color:var(--muted);font-size:12.5px\">DS-Distill-Qwen-32B<\/span><\/td>\n          <td><span class=\"pt pt6\">\u2465 suf+dsa(ON)<\/span><\/td>\n          <td>mix 9,240\uc73c\ub85c \ucd5c\uace0\uce58 \u2014 \ub2e8 corpus-dependent, \ubd84\ub9ac \uce21\uc815 \ud6c4 \uc801\uc6a9<\/td><\/tr>\n      <tr><td>reasoning distill 70B<br><span style=\"color:var(--muted);font-size:12.5px\">DS-Distill-Llama-70B<\/span><\/td>\n          <td><span class=\"pt pt5\">\u2464 suf(ON)<\/span><\/td>\n          <td>mix +90%, \ub2e8 \uac1c\ubcc4 corpus\uc5d0\uc11c\ub294 vLLM \uae30\ubcf8 \uc124\uc815\uacfc \ubc31\uc911 \u2014 mix\uc131 \ud2b8\ub798\ud53d\uc77c \ub54c\ub9cc<\/td><\/tr>\n      <tr><td>671B MoE<br><span style=\"color:var(--muted);font-size:12.5px\">DeepSeek-R1<\/span><\/td>\n          <td><strong>vLLM \uae30\ubcf8 \uc124\uc815 \u2014 suffix \uae08\uc9c0<\/strong><\/td>\n          <td class=\"num neg\">suffix \uc2dc \u221249 ~ \u221253%<\/td><\/tr>\n      <tr><td>405B-FP8 + suffix<\/td>\n          <td>\ud604 \ube4c\ub4dc\uc5d0\uc11c \ubd80\ud305 \ubd88\uac00<\/td>\n          <td>gmu\/K \ud558\ud5a5 \uc7ac\uc2dc\ub3c4 \uc804\uae4c\uc9c0 \u2463 \uad6c\uc131\uc740 host-OFF baseline \uc218\uce58\ub85c\ub9cc \ucc38\uace0<\/td><\/tr>\n    <\/tbody>\n  <\/table><\/div>\n\n  <p>\uc804\ud3b8 \ub300\ube44 \uc218\uc815 \uc0ac\ud56d\uc740 \ub450 \uac00\uc9c0\ub2e4. \uccab\uc9f8, <strong>&#8220;\u22647B\ub294 vLLM \uae30\ubcf8 \uc124\uc815&#8221; \ud589\uc744 \uc0ad\uc81c<\/strong>\ud55c\ub2e4 \u2014 real trace corpus\uc5d0\uc11c\ub294\n  7~8B\ub3c4 suffix\uac00 \ubd84\uba85\ud55c net-positive\ub2e4(\u00a703). \ub458\uc9f8, <strong>&#8220;MoE\ub294 \ubcc4\ub3c4 \uac80\uc99d \uc804\uae4c\uc9c0 vLLM \uae30\ubcf8 \uc124\uc815&#8221; \ud589\uc744 \ucd94\uac00<\/strong>\ud55c\ub2e4.\n  \ubd80\ud305 \uc778\uc790\ub294 \uc804\ud3b8\uacfc \ub3d9\uc77c \uace8\uaca9\uc774\uace0, \ubcf8 \uc2e4\ud5d8\uc5d0\uc11c \uc4f4 \ud615\ud0dc\ub294 \uc774\ub807\ub2e4.<\/p>\n\n<pre><code><span class=\"cm\"># server boot (sweep_corpus.sh)<\/span>\n--speculative-config '{\"method\":\"suffix\",\"num_speculative_tokens\":32}'\n<span class=\"cm\"># gpu_memory_utilization=0.85, max_model_len=16384, cudagraph FULL_AND_PIECEWISE<\/span>\n\n<span class=\"cm\"># \uacf5\ud1b5 env<\/span>\nexport ARCTIC_INFERENCE_ENABLED=0 VLLM_PLUGINS=\"\"\nexport PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True\nexport VLLM_NGRAM_NUM_THREADS_CAP=8 VLLM_NGRAM_DIVIDE_BY_TP=0<\/code><\/pre>\n\n  <p>vllm DSA env(\u2462\u2465\uc6a9 <code>VLLM_LHC_DSA=1 ...<\/code>)\ub294 \u00a705\uc758 \uacb0\ub860\ub300\ub85c default-off\ub97c \uad8c\uc7a5\ud55c\ub2e4.<\/p>\n<\/section>\n\n<!-- \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 08 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 -->\n<section id=\"s08\">\n  <div class=\"sec-no\">08<\/div>\n  <h2>\ud68c\ud53c\ud574\uc57c \ud560 \ud568\uc815\ub4e4<\/h2>\n  <p class=\"tagline\">\uc774\ubc88 \uc2e4\ud5d8\uc5d0\uc11c \uc0c8\ub85c \ucd94\uac00\ub41c \ubaa9\ub85d.<\/p>\n\n  <ol>\n    <li><strong>\uc2dc\uc810\uc774 \ub2e4\ub978 \uce21\uc815\uc744 \uac19\uc740 baseline\uc73c\ub85c \ud569\uce58\uae30.<\/strong> vllm \uc778\uc790\uac00 \ub3d9\uc77c\ud574\ub3c4 \ud638\uc2a4\ud2b8 sysfs \uc0c1\ud0dc(DSA WQ)\ub098\n    cudagraph \uacbd\ub85c\uac00 \ub2e4\ub974\uba74 vLLM \uae30\ubcf8 \uc124\uc815\ubd80\ud130 +36% \uc5b4\uae0b\ub09c\ub2e4. baseline \uc7ac\uc0ac\uc6a9 \uc804\uc5d0 \ud638\uc2a4\ud2b8 \uad6c\uc131\uc758 diff\ub97c \ubc15\uc81c\ud558\ub77c.<\/li>\n    <li><strong>671B MoE\uc5d0 dense\uc758 \uc9c1\uad00\uc73c\ub85c suffix\ub97c \ucf1c\uae30.<\/strong> 7\uac1c corpus \uc804\uc6d0 \ud68c\uadc0, \ucd5c\ub300 \u221253%. dense\uc5d0\uc11c\uc758 +232%\uc640 \uac19\uc740 \uc124\uc815\uc774\ub2e4.<\/li>\n    <li><strong>405B-FP8\uc5d0 suffix K=32\ub97c \uadf8\ub300\ub85c \ubd99\uc774\uae30.<\/strong> engine init \ub2e8\uacc4\uc5d0\uc11c crash. gmu 0.85 \uae30\uc900\uc774\uba70, \ucf1c\uae30 \uc804\uc5d0 KV block \uc218\ubd80\ud130 \ud655\uc778.<\/li>\n    <li><strong>vllm DSA env\uc5d0 \uae30\ub300\ub97c \uac78\uae30.<\/strong> vLLM \uae30\ubcf8 \uc124\uc815 \uc704 0%, suffix \uc704 \u00b15% noise. \uc774 \ucd95\uc758 driver\ub294 \ud638\uc2a4\ud2b8 \uad6c\uc131\uc774\ub2e4.<\/li>\n    <li><strong>\ud638\uc2a4\ud2b8 DSA WQ \uad6c\uc131\uc744 \uc601\uc18d\uc774\ub77c\uace0 \ubbff\uae30.<\/strong> <code>accel-config save-config<\/code> \uc5c6\uc774\ub294 \uc7ac\ubd80\ud305 \uc2dc \uc18c\uc2e4 \u2014 \uc5b4\ub290 \ub0a0 \uac11\uc790\uae30 &#8220;\uc131\ub2a5\uc774 \uc608\uc804\uacfc \ub2e4\ub978&#8221; \ubbf8\uc2a4\ud130\ub9ac\uc758 \uc528\uc557\uc774 \ub41c\ub2e4.<\/li>\n    <li><strong>mix corpus \uc218\uce58\ub97c \ub2e8\uc77c corpus \uc6b4\uc601\uc5d0 \uc678\uc0bd\ud558\uae30.<\/strong> mix\uc758 suf-gain\uc740 \uac1c\ubcc4 corpus\ubcf4\ub2e4 \uc77c\uad00\ub418\uac8c \ud06c\ub2e4. \uc790\uae30 \ud2b8\ub798\ud53d \ubd84\ud3ec\ub85c \ubd84\ub9ac \uce21\uc815\uc774 \uba3c\uc800\ub2e4.<\/li>\n  <\/ol>\n<\/section>\n\n<!-- \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 09 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 -->\n<section id=\"s09\">\n  <div class=\"sec-no\">09<\/div>\n  <h2>\uc815\ub9ac\ud558\uba74<\/h2>\n  <p class=\"tagline\">\uc138 \uc904\ub85c.<\/p>\n\n  <p>\uccab\uc9f8, <strong>\uc804\ud3b8\uc758 \ucc98\ubc29\uc740 dense \ubaa8\ub378\uc5d0\uc11c\ub294 \ud06c\uae30\ub97c \uac00\ub9ac\uc9c0 \uc54a\uace0 \uc774\uc2dd\ub41c\ub2e4<\/strong>.\n  suffix K=32 \ud558\ub098\ub85c 7B\ubd80\ud130 405B\uae4c\uc9c0 +50~+232%, 70\uc140 \uc911 36\uc140\uc758 winner.\n  \uc804\ud3b8\uc758 &#8220;7B \uacbd\uacc4&#8221;\ub294 \ubaa8\ub378\uc758 \uc18d\uc131\uc774 \uc544\ub2c8\ub77c \uadf8 corpus\u00b7\ud558\ub4dc\uc6e8\uc5b4\uc758 \uc0c1\uc218\uc600\uace0, K &gt; R \ubd80\ub4f1\uc2dd\ub9cc\uc774 \ud658\uacbd\uc744 \uac74\ub108 \uc0b4\uc544\ub0a8\uc558\ub2e4.<\/p>\n\n  <p>\ub458\uc9f8, <strong>\uc124\uc815 \ubc14\uae65\uc758 \ud638\uc2a4\ud2b8 \uc0c1\ud0dc\uac00 \uce21\uc815\uc744 \ud754\ub4e0\ub2e4<\/strong>. DSA WQ enable \uc804\ud6c4\ub85c vLLM \uae30\ubcf8 \uc124\uc815\uc774 +33~36% \ucc28\uc774 \ub0ac\uc9c0\ub9cc\n  <code>clients=0<\/code>\uc774 \ud655\uc778\ub418\uc5b4 cudagraph mode \ucc28\uc774\uac00 \uc9c4\ubc94\uc774\ub77c\ub294 \uac00\uc124\uc774 \uc720\ub825\ud558\ub2e4 \u2014 \uc778\uacfc\ub294 \uaca9\ub9ac \uac80\uc99d\uc73c\ub85c \uac00\ub9b0\ub2e4.\n  \ud655\uc2e4\ud55c \uad50\ud6c8\uc740 \ud558\ub098\ub2e4: baseline\uc744 \uc7ac\uc0ac\uc6a9\ud558\ub824\uba74 vllm \uc778\uc790\ub9cc\uc774 \uc544\ub2c8\ub77c \ud638\uc2a4\ud2b8 \uad6c\uc131\uae4c\uc9c0 \uac19\uc544\uc57c \ud55c\ub2e4.<\/p>\n\n  <p>\uc14b\uc9f8, <strong>\uc544\ud0a4\ud14d\ucc98\uac00 \uc0c8 \uacbd\uacc4\ub97c \ub9cc\ub4e0\ub2e4<\/strong>. dense 405B\ub294 +126%\uc778\ub370 671B MoE\ub294 \uc804 corpus \u221249%.\n  &#8220;\ud074\uc218\ub85d spec\uc774 \uc720\ub9ac\ud558\ub2e4&#8221;\ub294 dense\uc758 \uc9c1\uad00\uc740 MoE \uc55e\uc5d0\uc11c \uba48\ucd98\ub2e4. vllm \ub808\ubca8 DSA env\ub294 \uc5b4\ub290 \ucabd\uc5d0\uc11c\ub3c4 \ubd80\ucc28\uc801\uc774\uc5c8\ub2e4.<\/p>\n\n  <div class=\"closing\">\n    <span class=\"k\">CLOSING<\/span>\n    \uc804\ud3b8\uc758 \ubb38\uc7a5\uc744 \ud55c \ubc88 \ub354 \uc4f4\ub2e4 \u2014 vLLM\uc740 \uc774\ubbf8 \ube60\ub974\ub2e4. \uc774\ubc88\uc5d0 \ud655\uc778\ud55c \uac74 \uadf8 \ubb38\uc7a5\uc758 <em>\uc801\uc6a9 \ubc94\uc704<\/em>\ub2e4.\n    \ub0b4\uc7a5 \uc124\uc815\uc758 \uac00\uc18d\uc740 \ubaa8\ub378 \ud558\ub098\uc758 \ud589\uc6b4\uc774 \uc544\ub2c8\ub77c dense \uc77c\ubc18\uc758 \uc131\uc9c8\uc774\uc5c8\uace0,\n    \uadf8 \ubc94\uc704\uc758 \ub05d(MoE, \uadf8\ub9ac\uace0 vllm \ubc14\uae65\uc758 \ud638\uc2a4\ud2b8 \uc0c1\ud0dc)\uc774 \uc5b4\ub514\uc778\uc9c0\ub97c 406\uc140\uc774 \uaca9\uc790\ub85c \ucc44\uc6cc \ub123\uc5c8\ub2e4.\n  <\/div>\n\n  <p class=\"env\">\n    \uce21\uc815 \ud658\uacbd: DGX B200 &#8211; NVIDIA B200\u00d78 (sm_100, HBM3e 183 GiB, NVLink5) \u00b7 Intel Xeon Platinum 8570 \u00d72 (224 thread, AMX\/AVX-512\/DSA) \u00b7\n    vLLM 1.7.dev (sm_100 \ube4c\ub4dc) \u00b7 cudagraph FULL_AND_PIECEWISE \u00b7 gmu 0.85 \u00b7 max_model_len 16,384 \u00b7\n    real-trace prompts, concurrency 32, max_tokens 8,192, streaming.<br>\n    10\uac1c \ubaa8\ub378(Qwen2.5 7\/32\/72B \u00b7 DS-R1-Distill 7\/32\/70B \u00b7 Llama-3.1 8\/70\/405B-FP8 \u00b7 DeepSeek-R1) \u00d7 7\uac1c corpus\n    (sharegpt \u00b7 wildchat \u00b7 lmsys \u00b7 humaneval \u00b7 mbpp \u00b7 swebench \u00b7 mix) \u00d7 6\uac1c \uce21\uc815\uc810 = 406\/420\uc140.\n     \u00b7 \ubcf8 \uc2e4\ud5d8: 2026-06-10+.\n  <\/p>\n\n  \n\n<\/section>\n\n<\/div>\n<\/body>\n<\/html>\n","protected":false},"excerpt":{"rendered":"<p>\uadf8 \uc124\uc815, \ub2e4\ub978 \ubaa8\ub378\uc5d0\uc11c\ub3c4 \ud1b5\ud55c\ub2e4. \uac70\uc758. \u2014 vLLM Performance \u00b7 Engineering Notes vLLM Performance \u00b7 Engineering Notes \u2014 2\ud3b8 \uadf8 \uc124\uc815, \ub2e4\ub978 \ubaa8\ub378\uc5d0\uc11c\ub3c4 \ud1b5\ud55c\ub2e4. \uac70\uc758. \uc804\ud3b8\uc5d0\uc11c \uc815\ub9ac\ud55c suffix \uad6c\uc131\uc744 10\uac1c<span class=\"more-button\"><a href=\"http:\/\/kyunam.com\/?p=967\" class=\"more-link\">\uc77d\uc5b4 \ubcfc\uae4c?<span class=\"screen-reader-text\">\uc5b4\ub5a4 \uc124\uc815\uc73c\ub85c vLLM\uc744 \ube60\ub974\uac8c \ud560 \uac83\uc778\uac00?<\/span><\/a><\/span><\/p>\n","protected":false},"author":1,"featured_media":971,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[309,44,310],"tags":[354,356,349,352,347,351,350,355,316,348,353,306],"class_list":["post-967","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-ai","category-tech","category-vllm","tag-deepseek-r1","tag-gpu-","tag-intel-dsa","tag-llama-3-1","tag-llm--","tag-moe","tag-nvidia-b200","tag-qwen2-5","tag-speculative-decoding","tag-suffix-decoding","tag-throughput-","tag-vllm"],"_links":{"self":[{"href":"http:\/\/kyunam.com\/index.php?rest_route=\/wp\/v2\/posts\/967","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/kyunam.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/kyunam.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/kyunam.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/kyunam.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=967"}],"version-history":[{"count":1,"href":"http:\/\/kyunam.com\/index.php?rest_route=\/wp\/v2\/posts\/967\/revisions"}],"predecessor-version":[{"id":973,"href":"http:\/\/kyunam.com\/index.php?rest_route=\/wp\/v2\/posts\/967\/revisions\/973"}],"wp:featuredmedia":[{"embeddable":true,"href":"http:\/\/kyunam.com\/index.php?rest_route=\/wp\/v2\/media\/971"}],"wp:attachment":[{"href":"http:\/\/kyunam.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=967"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/kyunam.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=967"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/kyunam.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=967"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}