You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
187 lines
4.6 KiB
187 lines
4.6 KiB
#!/usr/bin/env node
|
|
/**
|
|
* Playwright 浏览器工具集
|
|
* 提供便捷的网页访问、截图、内容提取等功能
|
|
*/
|
|
|
|
import { chromium } from 'playwright';
|
|
import fs from 'fs';
|
|
import path from 'path';
|
|
|
|
/**
|
|
* 访问网页并获取页面内容
|
|
* @param {string} url - 要访问的URL
|
|
* @param {Object} options - 配置选项
|
|
* @returns {Object} 页面内容和截图
|
|
*/
|
|
export async function visitPage(url, options = {}) {
|
|
const {
|
|
headless = true,
|
|
waitUntil = 'domcontentloaded',
|
|
timeout = 30000,
|
|
screenshot = true,
|
|
screenshotPath = null,
|
|
fullPage = false
|
|
} = options;
|
|
|
|
const browser = await chromium.launch({ headless });
|
|
const context = await browser.newContext({
|
|
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
});
|
|
const page = await context.newPage();
|
|
|
|
try {
|
|
console.log(`🌐 正在访问: ${url}`);
|
|
await page.goto(url, { waitUntil, timeout });
|
|
|
|
// 获取页面标题
|
|
const title = await page.title();
|
|
console.log(`📄 页面标题: ${title}`);
|
|
|
|
// 获取页面内容
|
|
const content = await page.content();
|
|
|
|
// 获取页面文本(简化版)
|
|
const text = await page.evaluate(() => {
|
|
return document.body.innerText;
|
|
});
|
|
|
|
let screenshotBuffer = null;
|
|
if (screenshot) {
|
|
screenshotBuffer = await page.screenshot({ fullPage });
|
|
if (screenshotPath) {
|
|
fs.writeFileSync(screenshotPath, screenshotBuffer);
|
|
console.log(`📸 截图已保存: ${screenshotPath}`);
|
|
}
|
|
}
|
|
|
|
return {
|
|
success: true,
|
|
url,
|
|
title,
|
|
content,
|
|
text,
|
|
screenshot: screenshotBuffer
|
|
};
|
|
|
|
} catch (error) {
|
|
console.error(`❌ 访问失败: ${error.message}`);
|
|
return {
|
|
success: false,
|
|
url,
|
|
error: error.message
|
|
};
|
|
} finally {
|
|
await browser.close();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* 截取网页截图
|
|
* @param {string} url - 网页URL
|
|
* @param {string} outputPath - 输出路径
|
|
* @param {Object} options - 配置选项
|
|
*/
|
|
export async function screenshot(url, outputPath, options = {}) {
|
|
const result = await visitPage(url, {
|
|
...options,
|
|
screenshot: true,
|
|
screenshotPath: outputPath
|
|
});
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* 提取网页文本内容
|
|
* @param {string} url - 网页URL
|
|
* @param {Object} options - 配置选项
|
|
*/
|
|
export async function extractText(url, options = {}) {
|
|
const result = await visitPage(url, {
|
|
...options,
|
|
screenshot: false
|
|
});
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* 在页面上执行自定义操作
|
|
* @param {string} url - 网页URL
|
|
* @param {Function} action - 自定义操作函数
|
|
* @param {Object} options - 配置选项
|
|
*/
|
|
export async function customAction(url, action, options = {}) {
|
|
const {
|
|
headless = true,
|
|
waitUntil = 'domcontentloaded',
|
|
timeout = 30000
|
|
} = options;
|
|
|
|
const browser = await chromium.launch({ headless });
|
|
const context = await browser.newContext({
|
|
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
});
|
|
const page = await context.newPage();
|
|
|
|
try {
|
|
console.log(`🌐 正在访问: ${url}`);
|
|
await page.goto(url, { waitUntil, timeout });
|
|
|
|
// 执行自定义操作
|
|
const result = await action(page);
|
|
|
|
return {
|
|
success: true,
|
|
url,
|
|
result
|
|
};
|
|
|
|
} catch (error) {
|
|
console.error(`❌ 操作失败: ${error.message}`);
|
|
return {
|
|
success: false,
|
|
url,
|
|
error: error.message
|
|
};
|
|
} finally {
|
|
await browser.close();
|
|
}
|
|
}
|
|
|
|
// 如果直接运行此脚本
|
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
// 示例:访问一个网页
|
|
const url = process.argv[2] || 'https://www.baidu.com';
|
|
const outputDir = path.join(process.cwd(), 'screenshots');
|
|
|
|
if (!fs.existsSync(outputDir)) {
|
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
}
|
|
|
|
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
const screenshotPath = path.join(outputDir, `screenshot-${timestamp}.png`);
|
|
|
|
visitPage(url, { screenshotPath })
|
|
.then(result => {
|
|
if (result.success) {
|
|
console.log('\n✅ 访问成功!');
|
|
console.log(`标题: ${result.title}`);
|
|
console.log(`文本长度: ${result.text.length} 字符`);
|
|
} else {
|
|
console.log('\n❌ 访问失败');
|
|
console.log(`错误: ${result.error}`);
|
|
}
|
|
process.exit(result.success ? 0 : 1);
|
|
})
|
|
.catch(error => {
|
|
console.error('Error:', error);
|
|
process.exit(1);
|
|
});
|
|
}
|
|
|
|
export default {
|
|
visitPage,
|
|
screenshot,
|
|
extractText,
|
|
customAction
|
|
};
|
|
|