#!/usr/bin/env python3 """ 调试Computer Science的Faculty页面 """ import asyncio from playwright.async_api import async_playwright async def debug_cs(): async with async_playwright() as p: browser = await p.chromium.launch(headless=False) page = await browser.new_page() # 访问Computer Science GSAS页面 gsas_url = "https://gsas.harvard.edu/program/computer-science" print(f"访问: {gsas_url}") await page.goto(gsas_url, wait_until="domcontentloaded", timeout=30000) await page.wait_for_timeout(3000) await page.screenshot(path="cs_gsas_page.png", full_page=True) print("截图已保存: cs_gsas_page.png") # 查找所有链接 links = await page.evaluate('''() => { const links = []; document.querySelectorAll('a[href]').forEach(a => { const text = a.innerText.trim(); const href = a.href; if (text && text.length > 2 && text.length < 100) { links.push({text: text, href: href}); } }); return links; }''') print(f"\n页面上的所有链接 ({len(links)} 个):") for link in links: print(f" - {link['text'][:60]} -> {link['href']}") # 查找可能的Faculty或People链接 print("\n\n查找Faculty/People相关链接:") for link in links: text_lower = link['text'].lower() href_lower = link['href'].lower() if 'faculty' in text_lower or 'people' in href_lower or 'faculty' in href_lower or 'website' in text_lower: print(f" * {link['text']} -> {link['href']}") # 尝试访问SEAS (School of Engineering) print("\n\n尝试访问SEAS Computer Science页面...") seas_url = "https://seas.harvard.edu/computer-science" await page.goto(seas_url, wait_until="domcontentloaded", timeout=30000) await page.wait_for_timeout(2000) await page.screenshot(path="seas_cs_page.png", full_page=True) print("截图已保存: seas_cs_page.png") seas_links = await page.evaluate('''() => { const links = []; document.querySelectorAll('a[href]').forEach(a => { const text = a.innerText.trim(); const href = a.href; const lowerText = text.toLowerCase(); const lowerHref = href.toLowerCase(); if ((lowerText.includes('faculty') || lowerText.includes('people') || lowerHref.includes('faculty') || lowerHref.includes('people')) && text.length > 2) { links.push({text: text, href: href}); } }); return links; }''') print(f"\nSEAS页面上的Faculty/People链接:") for link in seas_links: print(f" * {link['text']} -> {link['href']}") await browser.close() if __name__ == "__main__": asyncio.run(debug_cs())