#!/usr/bin/env node import { chromium } from 'playwright'; import fs from 'fs'; import path from 'path'; import { fileURLToPath } from 'url'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); async function summarize(url, options = {}) { const { length = 'medium', output = 'text' } = options; console.log(`✨ Summarizing: ${url}`); console.log(`šŸ“ Length: ${length}`); const browser = await chromium.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox'] }); const page = await browser.newPage(); await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }); // ęå–é”µé¢å†…å®¹ const content = await page.evaluate(() => { // ē§»é™¤å¹²ę‰°å…ƒē“  const selectors = 'script, style, nav, footer, aside, .ad, .advertisement, .popup, .cookie-banner'; document.querySelectorAll(selectors).forEach(el => el.remove()); // ęå–å†…å®¹ const title = document.title; const text = document.body.innerText .replace(/\s+/g, ' ') .replace(/\n{3,}/g, '\n\n') .trim(); // ęå–ę‰€ęœ‰é“¾ęŽ„ const links = Array.from(document.querySelectorAll('a[href]')) .map(a => ({ text: a.textContent.trim(), href: a.href })) .filter(link => link.text && link.href.startsWith('http')) .slice(0, 10); // ęå–å›¾ē‰‡ const images = Array.from(document.querySelectorAll('img[src]')) .map(img => ({ alt: img.alt.trim(), src: img.src })) .filter(img => img.src.startsWith('http')) .slice(0, 5); return { title, text, links, images }; }); await browser.close(); // ę ¹ę®é•æåŗ¦å†³å®šę‘˜č¦å¤§å° const lengthMap = { short: 300, medium: 800, long: 2000, xl: 5000 }; const maxLength = lengthMap[length] || lengthMap.medium; const summary = content.text.slice(0, maxLength) + (content.text.length > maxLength ? '...' : ''); // č¾“å‡ŗē»“ęžœ const result = { url, title: content.title, summary, stats: { totalCharacters: content.text.length, summaryLength: summary.length, linksFound: content.links.length, imagesFound: content.images.length }, topLinks: content.links, topImages: content.images }; if (output === 'json') { console.log(JSON.stringify(result, null, 2)); } else { console.log(`\nšŸ“„ Title: ${result.title}`); console.log(`\nšŸ“ Summary:`); console.log(result.summary); console.log(`\nšŸ“Š Stats:`); console.log(` - Total characters: ${result.stats.totalCharacters}`); console.log(` - Summary length: ${result.stats.summaryLength}`); console.log(` - Links found: ${result.stats.linksFound}`); console.log(` - Images found: ${result.stats.imagesFound}`); if (result.topLinks.length > 0) { console.log(`\nšŸ”— Top links:`); result.topLinks.forEach((link, i) => { console.log(` ${i+1}. ${link.text} - ${link.href}`); }); } } // 清理专时文件 const tmpItems = fs.readdirSync('/tmp').filter(f => f.startsWith('playwright_')); tmpItems.forEach(item => { const fullPath = path.join('/tmp', item); const stat = fs.statSync(fullPath); if (stat.isDirectory()) { fs.rmSync(fullPath, { recursive: true, force: true }); } else { fs.unlinkSync(fullPath); } }); return result; } // å¦‚ęžœē›“ęŽ„čæč”Œčæ™äøŖę–‡ä»¶ļ¼Œę‰§č”Œę¼”ē¤ŗ if (process.argv[1] === __filename) { const url = process.argv[2] || 'https://www.baidu.com'; const length = process.argv[3] || 'medium'; summarize(url, { length }); } export default summarize;