Files
University-Playwright-Codeg…/backend/app/api/results.py
yangxiaoyu-crypto 426cf4d2cd Add university scraper system with backend, frontend, and configs
- Add src/university_scraper module with scraper, analyzer, and CLI
- Add backend FastAPI service with API endpoints and database models
- Add frontend React app with university management pages
- Add configs for Harvard, Manchester, and UCL universities
- Add artifacts with various scraper implementations
- Add Docker compose configuration for deployment
- Update .gitignore to exclude generated files

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-22 15:25:08 +08:00

176 lines
5.2 KiB
Python

"""爬取结果API"""
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi.responses import JSONResponse
from sqlalchemy.orm import Session
from ..database import get_db
from ..models import ScrapeResult
from ..schemas.result import ResultResponse
router = APIRouter()
@router.get("/university/{university_id}", response_model=ResultResponse)
def get_university_result(
university_id: int,
db: Session = Depends(get_db)
):
"""获取大学最新的爬取结果"""
result = db.query(ScrapeResult).filter(
ScrapeResult.university_id == university_id
).order_by(ScrapeResult.created_at.desc()).first()
if not result:
raise HTTPException(status_code=404, detail="没有爬取结果")
return result
@router.get("/university/{university_id}/schools")
def get_schools(
university_id: int,
db: Session = Depends(get_db)
):
"""获取学院列表"""
result = db.query(ScrapeResult).filter(
ScrapeResult.university_id == university_id
).order_by(ScrapeResult.created_at.desc()).first()
if not result:
raise HTTPException(status_code=404, detail="没有爬取结果")
schools = result.result_data.get("schools", [])
# 返回简化的学院列表
return {
"total": len(schools),
"schools": [
{
"name": s.get("name"),
"url": s.get("url"),
"program_count": len(s.get("programs", []))
}
for s in schools
]
}
@router.get("/university/{university_id}/programs")
def get_programs(
university_id: int,
school_name: Optional[str] = Query(None, description="按学院筛选"),
search: Optional[str] = Query(None, description="搜索项目名称"),
db: Session = Depends(get_db)
):
"""获取项目列表"""
result = db.query(ScrapeResult).filter(
ScrapeResult.university_id == university_id
).order_by(ScrapeResult.created_at.desc()).first()
if not result:
raise HTTPException(status_code=404, detail="没有爬取结果")
schools = result.result_data.get("schools", [])
programs = []
for school in schools:
if school_name and school.get("name") != school_name:
continue
for prog in school.get("programs", []):
if search and search.lower() not in prog.get("name", "").lower():
continue
programs.append({
"name": prog.get("name"),
"url": prog.get("url"),
"degree_type": prog.get("degree_type"),
"school": school.get("name"),
"faculty_count": len(prog.get("faculty", []))
})
return {
"total": len(programs),
"programs": programs
}
@router.get("/university/{university_id}/faculty")
def get_faculty(
university_id: int,
school_name: Optional[str] = Query(None, description="按学院筛选"),
program_name: Optional[str] = Query(None, description="按项目筛选"),
search: Optional[str] = Query(None, description="搜索导师姓名"),
skip: int = Query(0, ge=0),
limit: int = Query(50, ge=1, le=200),
db: Session = Depends(get_db)
):
"""获取导师列表"""
result = db.query(ScrapeResult).filter(
ScrapeResult.university_id == university_id
).order_by(ScrapeResult.created_at.desc()).first()
if not result:
raise HTTPException(status_code=404, detail="没有爬取结果")
schools = result.result_data.get("schools", [])
faculty_list = []
for school in schools:
if school_name and school.get("name") != school_name:
continue
for prog in school.get("programs", []):
if program_name and prog.get("name") != program_name:
continue
for fac in prog.get("faculty", []):
if search and search.lower() not in fac.get("name", "").lower():
continue
faculty_list.append({
"name": fac.get("name"),
"url": fac.get("url"),
"title": fac.get("title"),
"email": fac.get("email"),
"program": prog.get("name"),
"school": school.get("name")
})
total = len(faculty_list)
faculty_list = faculty_list[skip:skip + limit]
return {
"total": total,
"skip": skip,
"limit": limit,
"faculty": faculty_list
}
@router.get("/university/{university_id}/export")
def export_result(
university_id: int,
format: str = Query("json", enum=["json"]),
db: Session = Depends(get_db)
):
"""导出爬取结果"""
result = db.query(ScrapeResult).filter(
ScrapeResult.university_id == university_id
).order_by(ScrapeResult.created_at.desc()).first()
if not result:
raise HTTPException(status_code=404, detail="没有爬取结果")
if format == "json":
return JSONResponse(
content=result.result_data,
headers={
"Content-Disposition": f"attachment; filename=university_{university_id}_result.json"
}
)
raise HTTPException(status_code=400, detail="不支持的格式")