{"id":48909,"date":"2026-03-15T11:23:04","date_gmt":"2026-03-15T03:23:04","guid":{"rendered":"https:\/\/www.geobok.com\/geo-toolkit\/chunk-simulator\/"},"modified":"2026-04-03T11:04:44","modified_gmt":"2026-04-03T03:04:44","slug":"chunk-simulator","status":"publish","type":"page","link":"https:\/\/www.geobok.com\/en\/geo-toolkit\/chunk-simulator\/","title":{"rendered":"Chunk Simulator"},"content":{"rendered":"\n<style>\n\/* ===== B3 \u7279\u6709\u6837\u5f0f ===== *\/\n.cs-params { display: flex; gap: 16px; align-items: flex-end; flex-wrap: wrap; margin-bottom: 20px; padding: 20px; background: var(--geo-bg-page); border-radius: var(--geo-radius-md); border: 1px solid var(--geo-border); }\n.cs-param { display: flex; flex-direction: column; gap: 4px; }\n.cs-param label { font-size: 12px; font-weight: 700; color: var(--geo-text-muted); text-transform: uppercase; letter-spacing: 0.5px; }\n.cs-param input[type=\"range\"] { width: 160px; accent-color: var(--geo-primary); }\n.cs-param-val { font-size: 14px; font-weight: 700; color: var(--geo-primary); min-width: 50px; }\n\n\/* \u7edf\u8ba1\u680f *\/\n.cs-stats { display: flex; gap: 20px; flex-wrap: wrap; margin-bottom: 20px; }\n.cs-stat { display: flex; align-items: baseline; gap: 6px; font-size: 14px; color: var(--geo-text-muted); }\n.cs-stat__val { font-size: 20px; font-weight: 800; color: var(--geo-text-primary); }\n\n\/* \u5207\u7247\u5361\u7247 *\/\n.cs-chunk { border: 1px solid var(--geo-border); border-radius: var(--geo-radius-md); margin-bottom: 10px; overflow: hidden; transition: box-shadow 0.15s; }\n.cs-chunk:hover { box-shadow: var(--geo-shadow-md); }\n.cs-chunk__head { display: flex; align-items: center; justify-content: space-between; padding: 10px 16px; background: var(--geo-bg-page); border-bottom: 1px solid var(--geo-border); }\n.cs-chunk__id { font-size: 13px; font-weight: 700; color: var(--geo-primary); font-family: var(--geo-font-mono); }\n.cs-chunk__meta { display: flex; gap: 10px; font-size: 12px; color: var(--geo-text-faint); }\n.cs-chunk__body { padding: 14px 16px; font-size: 14px; color: var(--geo-text-body); line-height: 1.7; white-space: pre-wrap; word-break: break-all; }\n\n\/* \u8bed\u4e49\u5339\u914d\u6807\u7b7e *\/\n.cs-sem-tag { font-size: 11px; padding: 2px 8px; border-radius: 4px; font-weight: 700; }\n.cs-sem-high { background: var(--geo-success-bg); color: #166534; }\n.cs-sem-mid { background: var(--geo-warn-bg); color: #92400e; }\n.cs-sem-low { background: var(--geo-danger-bg); color: #991b1b; }\n\n\/* \u8fb9\u754c\u95ee\u9898 *\/\n.cs-issue { display: flex; align-items: flex-start; gap: 10px; padding: 10px 14px; border-radius: var(--geo-radius-sm); border: 1px solid var(--geo-warn-border); background: var(--geo-warn-bg); margin-bottom: 6px; font-size: 13px; color: #92400e; }\n.cs-issue__tag { font-weight: 700; white-space: nowrap; flex-shrink: 0; }\n\n@media (max-width: 768px) {\n    .cs-params { flex-direction: column; align-items: stretch; }\n    .cs-param input[type=\"range\"] { width: 100%; }\n}\n<\/style>\n\n<div class=\"geo-card\">\n    <div class=\"geo-hero\">\n        <h2 class=\"geo-hero__title\">\ud83d\udd2a Chunk Simulator<\/h2>\n        <p class=\"geo-hero__desc\">Simulate how AI splits your content into chunks. Adjust parameters and spot sentence breaks and pronoun reference issues.<\/p>\n    \n<div class=\"geo-about\">\n    <details class=\"geo-about__section\">\n        <summary>\ud83d\udcd6 What does this tool measure?<\/summary>\n        <div class=\"geo-about__body\">\n            <p>AI doesn't read your article top to bottom \u2014 it reads individual chunks carved out by the RAG system. This tool simulates the carving: adjust chunk size and overlap, watch where boundaries land, and catch problems before AI does.<\/p>\n            <p class=\"geo-about__ref\">See <em>Make AI Speak for You: The Definitive Guide to GEO<\/em>, Ch. 3.4<\/p>\n        <\/div>\n    <\/details>\n    <details class=\"geo-about__section\">\n        <summary>\u2753 FAQ: GEO Impact<\/summary>\n        <div class=\"geo-about__body\">\n            <div class=\"geo-faq-item\"><h4>Why does my content get chunked?<\/h4><p>RAG systems retrieve individual chunks, not whole pages. HTML headings (H2, H3) are common split points.<\/p><\/div>\n            <div class=\"geo-faq-item\"><h4>A chunk boundary splits my sentence \u2014 how to fix?<\/h4><p>Use H2\/H3 headings as natural split points. Keep each section self-contained and within comfortable extraction length.<\/p><\/div>\n            <div class=\"geo-faq-item\"><h4>Why are pronouns a problem inside chunks?<\/h4><p>Once extracted alone, pronouns point to nothing. Every chunk should be self-contained \u2014 understandable without surrounding context. That's GEO writing rule #1.<\/p><\/div>\n        <\/div>\n    <\/details>\n<\/div>\n<script type=\"application\/ld+json\">\n{\n  \"@context\": \"https:\/\/schema.org\",\n  \"@type\": \"FAQPage\",\n  \"mainEntity\": [\n    {\n      \"@type\": \"Question\",\n      \"name\": \"Why does my content get chunked?\",\n      \"acceptedAnswer\": {\n        \"@type\": \"Answer\",\n        \"text\": \"RAG systems retrieve individual chunks, not whole pages. HTML headings (H2, H3) are common split points.\"\n      }\n    },\n    {\n      \"@type\": \"Question\",\n      \"name\": \"A chunk boundary splits my sentence \u2014 how to fix?\",\n      \"acceptedAnswer\": {\n        \"@type\": \"Answer\",\n        \"text\": \"Use H2\/H3 headings as natural split points. Keep each section self-contained and within comfortable extraction length.\"\n      }\n    },\n    {\n      \"@type\": \"Question\",\n      \"name\": \"Why are pronouns a problem inside chunks?\",\n      \"acceptedAnswer\": {\n        \"@type\": \"Answer\",\n        \"text\": \"Once extracted alone, pronouns point to nothing. Every chunk should be self-contained \u2014 understandable without surrounding context. That's GEO writing rule #1.\"\n      }\n    }\n  ]\n}\n<\/script>\n\n<\/div>\n\n    <div class=\"geo-action\">\n        <div class=\"geo-form-grid\">\n            <div class=\"geo-field\">\n                <label class=\"geo-label\">\u5185\u5bb9\u6765\u6e90 <span class=\"geo-label--hint\">\uff08\u7c98\u8d34\u6587\u672c\u6216\u8f93\u5165 URL\uff0c\u4e8c\u9009\u4e00\uff09<\/span><\/label>\n                <input type=\"url\" id=\"cs-url\" class=\"geo-input\" placeholder=\"\u8f93\u5165 URL \u81ea\u52a8\u63d0\u53d6\u6587\u672c...\" style=\"margin-bottom:8px;\">\n                <textarea id=\"cs-text\" class=\"geo-input\" style=\"min-height:140px;\" placeholder=\"\u6216\u76f4\u63a5\u7c98\u8d34\u6587\u672c\u5185\u5bb9...\"><\/textarea>\n            <\/div>\n            <div class=\"geo-field\">\n                <label class=\"geo-label\">\u76ee\u6807\u67e5\u8be2\u8bcd <span class=\"geo-label--hint\">\uff08\u53ef\u9009\uff0c\u586b\u5199\u540e\u8ba1\u7b97\u6bcf\u4e2a\u5207\u7247\u7684\u8bed\u4e49\u5339\u914d\u5ea6\uff09<\/span><\/label>\n                <input type=\"text\" id=\"cs-query\" class=\"geo-input\" placeholder=\"\u4f8b\u5982\uff1a\u5bb6\u7528\u6295\u5f71\u4eea\u600e\u4e48\u9009\">\n            <\/div>\n        <\/div>\n\n        <div class=\"cs-params\">\n            <div class=\"cs-param\">\n                <label>\u5207\u7247\u5927\u5c0f (chunk_size)<\/label>\n                <div style=\"display:flex;align-items:center;gap:8px;\">\n                    <input type=\"range\" id=\"cs-size\" min=\"100\" max=\"2000\" step=\"50\" value=\"500\">\n                    <span class=\"cs-param-val\" id=\"cs-size-val\">500<\/span>\n                <\/div>\n            <\/div>\n            <div class=\"cs-param\">\n                <label>\u91cd\u53e0\u5927\u5c0f (overlap)<\/label>\n                <div style=\"display:flex;align-items:center;gap:8px;\">\n                    <input type=\"range\" id=\"cs-overlap\" min=\"0\" max=\"200\" step=\"10\" value=\"50\">\n                    <span class=\"cs-param-val\" id=\"cs-overlap-val\">50<\/span>\n                <\/div>\n            <\/div>\n            <button id=\"cs-btn\" class=\"geo-btn\">\u6267\u884c\u5207\u7247<\/button>\n        <\/div>\n    <\/div>\n\n    <div id=\"cs-result\" class=\"geo-result\" style=\"padding: 0 var(--geo-space-xl) var(--geo-space-xl);\"><\/div>\n<\/div>\n\n<script>\ndocument.addEventListener('DOMContentLoaded', function(){\n    \/* \u6ed1\u5757\u5b9e\u65f6\u663e\u793a *\/\n    var sizeSlider = document.getElementById('cs-size');\n    var overlapSlider = document.getElementById('cs-overlap');\n    sizeSlider.addEventListener('input', function(){ document.getElementById('cs-size-val').textContent = this.value; });\n    overlapSlider.addEventListener('input', function(){ document.getElementById('cs-overlap-val').textContent = this.value; });\n\n    var btn = document.getElementById('cs-btn');\n    var result = document.getElementById('cs-result');\n\n    btn.addEventListener('click', async function(){\n        var text = document.getElementById('cs-text').value.trim();\n        var url = document.getElementById('cs-url').value.trim();\n        var query = document.getElementById('cs-query').value.trim();\n        if(!text && !url){ GeoAPI.showError(result, '\u8bf7\u7c98\u8d34\u6587\u672c\u6216\u8f93\u5165 URL'); return; }\n\n        GeoAPI.disableBtn(btn, GeoAPI.t('btn.analyzing'));\n        GeoAPI.showLoading(result, GeoAPI.t('chunk.loading'));\n\n        try {\n            var data = await GeoAPI.post('\/geo\/chunk-simulate', {\n                text: text, url: url, query: query,\n                chunk_size: parseInt(sizeSlider.value),\n                chunk_overlap: parseInt(overlapSlider.value)\n            });\n            renderChunks(data);\n        } catch(e) {\n            if(!e._geoAuthHandled) GeoAPI.showError(result, e.message);\n        } finally { GeoAPI.enableBtn(btn, '\u6267\u884c\u5207\u7247'); }\n    });\n\n    function renderChunks(data) {\n        var html = '';\n\n        \/* \u7edf\u8ba1\u680f *\/\n        html += '<div class=\"cs-stats\">';\n        html += '<div class=\"cs-stat\"><span class=\"cs-stat__val\">'+data.total_chunks+'<\/span> \u4e2a\u5207\u7247<\/div>';\n        html += '<div class=\"cs-stat\"><span class=\"cs-stat__val\">'+GeoAPI.formatNumber(data.total_tokens)+'<\/span> \u603b Tokens<\/div>';\n        html += '<div class=\"cs-stat\"><span class=\"cs-stat__val\">'+data.avg_tokens_per_chunk+'<\/span> Tokens\/\u5207\u7247<\/div>';\n        html += '<div class=\"cs-stat\"><span class=\"cs-stat__val\">'+GeoAPI.formatNumber(data.original_length)+'<\/span> \u539f\u6587\u5b57\u7b26<\/div>';\n        if(data.boundary_issue_count > 0) {\n            html += '<div class=\"cs-stat\"><span class=\"cs-stat__val\" style=\"color:var(--geo-warn);\">'+data.boundary_issue_count+'<\/span> \u8fb9\u754c\u95ee\u9898<\/div>';\n        }\n        html += '<\/div>';\n\n        \/* \u8fb9\u754c\u95ee\u9898\u8b66\u544a *\/\n        if(data.boundary_issues && data.boundary_issues.length > 0) {\n            html += '<div class=\"geo-section\"><h3 class=\"geo-section__title\">\u26a0\ufe0f \u5207\u7247\u8fb9\u754c\u95ee\u9898<\/h3><span class=\"geo-section__desc\">\u8fd9\u4e9b\u95ee\u9898\u53ef\u80fd\u5bfc\u81f4 AI \u7406\u89e3\u4e0d\u5b8c\u6574<\/span><\/div>';\n            data.boundary_issues.forEach(function(issue){\n                html += '<div class=\"cs-issue\"><span class=\"cs-issue__tag\">Chunk #'+issue.chunk_id+'<\/span>'+GeoAPI.escapeHtml(issue.detail)+'<\/div>';\n            });\n        }\n\n        \/* \u5207\u7247\u5217\u8868 *\/\n        html += '<div class=\"geo-section\"><h3 class=\"geo-section__title\">\ud83d\udce6 \u5207\u7247\u7ed3\u679c<\/h3><span class=\"geo-section__desc\">\u53c2\u6570 chunk_size='+data.chunk_size+' overlap='+data.chunk_overlap+'<\/span><\/div>';\n\n        (data.chunks || []).forEach(function(c){\n            var semHtml = '';\n            if(c.semantic_score !== undefined) {\n                var semCls = c.semantic_score >= 0.75 ? 'cs-sem-high' : c.semantic_score <= 0.50 ? 'cs-sem-low' : 'cs-sem-mid';\n                semHtml = '<span class=\"cs-sem-tag '+semCls+'\">'+(c.semantic_score*100).toFixed(0)+'% '+GeoAPI.escapeHtml(c.semantic_label||'')+'<\/span>';\n            }\n\n            html += '<div class=\"cs-chunk\">'\n                  + '<div class=\"cs-chunk__head\">'\n                  + '<span class=\"cs-chunk__id\">Chunk #'+c.id+'<\/span>'\n                  + '<div class=\"cs-chunk__meta\">'\n                  + '<span>'+c.full_length+' \u5b57<\/span>'\n                  + '<span>'+c.tokens+' tokens<\/span>'\n                  + semHtml\n                  + '<\/div><\/div>'\n                  + '<div class=\"cs-chunk__body\">'+GeoAPI.escapeHtml(c.text)+'<\/div>'\n                  + '<\/div>';\n        });\n\n        GeoAPI.showResult(result, html);\n    }\n});\n<\/script>\n\n        \n","protected":false},"excerpt":{"rendered":"","protected":false},"author":1,"featured_media":0,"parent":48884,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"","meta":{"footnotes":""},"class_list":["post-48909","page","type-page","status-publish","hentry"],"_links":{"self":[{"href":"https:\/\/www.geobok.com\/en\/wp-json\/wp\/v2\/pages\/48909","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.geobok.com\/en\/wp-json\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/www.geobok.com\/en\/wp-json\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/www.geobok.com\/en\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.geobok.com\/en\/wp-json\/wp\/v2\/comments?post=48909"}],"version-history":[{"count":0,"href":"https:\/\/www.geobok.com\/en\/wp-json\/wp\/v2\/pages\/48909\/revisions"}],"up":[{"embeddable":true,"href":"https:\/\/www.geobok.com\/en\/wp-json\/wp\/v2\/pages\/48884"}],"wp:attachment":[{"href":"https:\/\/www.geobok.com\/en\/wp-json\/wp\/v2\/media?parent=48909"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}