Harvest_Zhihu_Page

Name: Harvest Zhihu Page
Rating: 76
Author: cjhfff

Category: browser-automation Target: host Trigger: 爬取知乎 | 抓取回答 | 提取素材

Description

自动滚动当前页面以触发懒加载，然后提取所有回答的纯文本并下载为本地 TXT 文件。

Workflow

Step 1: 输出提示

code

正在启动自动滚屏与抓取任务，请稍候...

Step 2: 执行 JavaScript 脚本

javascript

(function() {
  const config = {
    maxScrolls: 6,
    interval: 1500,
    outputName: "zhihu_data_" + new Date().toISOString().slice(0,10) + ".txt"
  };

  console.log("ClewdBot: 开始执行自动滚动...");

  let scrollCount = 0;
  let scroller = setInterval(() => {
    window.scrollTo(0, document.body.scrollHeight);
    scrollCount++;
    console.log(`ClewdBot: 滚动进度 ${scrollCount}/${config.maxScrolls}`);

    if (scrollCount >= config.maxScrolls) {
      clearInterval(scroller);
      setTimeout(extractAndDownload, 2000);
    }
  }, config.interval);

  function extractAndDownload() {
    console.log("ClewdBot: 开始提取文本...");

    const answers = document.querySelectorAll('.RichContent-inner');
    if (answers.length === 0) {
      alert("ClewdBot: 未检测到有效回答内容，请确认在问题页面运行。");
      return;
    }

    let content = `SOURCE: ${document.title}\n`;
    content += `URL: ${window.location.href}\n`;
    content += `TIME: ${new Date().toLocaleString()}\n`;
    content += "--------------------------------------------------\n";

    answers.forEach((ans, index) => {
      let text = ans.innerText.replace(/\s+/g, ' ');
      content += `=== 回答 #${index + 1} ===\n${text}\n`;
      content += "--------------------------------------------------\n";
    });

    const blob = new Blob([content], { type: 'text/plain;charset=utf-8' });
    const link = document.createElement('a');
    link.href = URL.createObjectURL(blob);
    link.download = config.outputName;
    document.body.appendChild(link);
    link.click();
    document.body.removeChild(link);

    alert(`任务完成！ 已抓取 ${answers.length} 条回答。 文件已自动下载: ${config.outputName}`);
  }
})();