- Add src/university_scraper module with scraper, analyzer, and CLI - Add backend FastAPI service with API endpoints and database models - Add frontend React app with university management pages - Add configs for Harvard, Manchester, and UCL universities - Add artifacts with various scraper implementations - Add Docker compose configuration for deployment - Update .gitignore to exclude generated files 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
176 lines
5.2 KiB
Python
176 lines
5.2 KiB
Python
"""爬取结果API"""
|
|
|
|
from typing import Optional
|
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
from fastapi.responses import JSONResponse
|
|
from sqlalchemy.orm import Session
|
|
|
|
from ..database import get_db
|
|
from ..models import ScrapeResult
|
|
from ..schemas.result import ResultResponse
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.get("/university/{university_id}", response_model=ResultResponse)
|
|
def get_university_result(
|
|
university_id: int,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""获取大学最新的爬取结果"""
|
|
result = db.query(ScrapeResult).filter(
|
|
ScrapeResult.university_id == university_id
|
|
).order_by(ScrapeResult.created_at.desc()).first()
|
|
|
|
if not result:
|
|
raise HTTPException(status_code=404, detail="没有爬取结果")
|
|
|
|
return result
|
|
|
|
|
|
@router.get("/university/{university_id}/schools")
|
|
def get_schools(
|
|
university_id: int,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""获取学院列表"""
|
|
result = db.query(ScrapeResult).filter(
|
|
ScrapeResult.university_id == university_id
|
|
).order_by(ScrapeResult.created_at.desc()).first()
|
|
|
|
if not result:
|
|
raise HTTPException(status_code=404, detail="没有爬取结果")
|
|
|
|
schools = result.result_data.get("schools", [])
|
|
|
|
# 返回简化的学院列表
|
|
return {
|
|
"total": len(schools),
|
|
"schools": [
|
|
{
|
|
"name": s.get("name"),
|
|
"url": s.get("url"),
|
|
"program_count": len(s.get("programs", []))
|
|
}
|
|
for s in schools
|
|
]
|
|
}
|
|
|
|
|
|
@router.get("/university/{university_id}/programs")
|
|
def get_programs(
|
|
university_id: int,
|
|
school_name: Optional[str] = Query(None, description="按学院筛选"),
|
|
search: Optional[str] = Query(None, description="搜索项目名称"),
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""获取项目列表"""
|
|
result = db.query(ScrapeResult).filter(
|
|
ScrapeResult.university_id == university_id
|
|
).order_by(ScrapeResult.created_at.desc()).first()
|
|
|
|
if not result:
|
|
raise HTTPException(status_code=404, detail="没有爬取结果")
|
|
|
|
schools = result.result_data.get("schools", [])
|
|
programs = []
|
|
|
|
for school in schools:
|
|
if school_name and school.get("name") != school_name:
|
|
continue
|
|
|
|
for prog in school.get("programs", []):
|
|
if search and search.lower() not in prog.get("name", "").lower():
|
|
continue
|
|
|
|
programs.append({
|
|
"name": prog.get("name"),
|
|
"url": prog.get("url"),
|
|
"degree_type": prog.get("degree_type"),
|
|
"school": school.get("name"),
|
|
"faculty_count": len(prog.get("faculty", []))
|
|
})
|
|
|
|
return {
|
|
"total": len(programs),
|
|
"programs": programs
|
|
}
|
|
|
|
|
|
@router.get("/university/{university_id}/faculty")
|
|
def get_faculty(
|
|
university_id: int,
|
|
school_name: Optional[str] = Query(None, description="按学院筛选"),
|
|
program_name: Optional[str] = Query(None, description="按项目筛选"),
|
|
search: Optional[str] = Query(None, description="搜索导师姓名"),
|
|
skip: int = Query(0, ge=0),
|
|
limit: int = Query(50, ge=1, le=200),
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""获取导师列表"""
|
|
result = db.query(ScrapeResult).filter(
|
|
ScrapeResult.university_id == university_id
|
|
).order_by(ScrapeResult.created_at.desc()).first()
|
|
|
|
if not result:
|
|
raise HTTPException(status_code=404, detail="没有爬取结果")
|
|
|
|
schools = result.result_data.get("schools", [])
|
|
faculty_list = []
|
|
|
|
for school in schools:
|
|
if school_name and school.get("name") != school_name:
|
|
continue
|
|
|
|
for prog in school.get("programs", []):
|
|
if program_name and prog.get("name") != program_name:
|
|
continue
|
|
|
|
for fac in prog.get("faculty", []):
|
|
if search and search.lower() not in fac.get("name", "").lower():
|
|
continue
|
|
|
|
faculty_list.append({
|
|
"name": fac.get("name"),
|
|
"url": fac.get("url"),
|
|
"title": fac.get("title"),
|
|
"email": fac.get("email"),
|
|
"program": prog.get("name"),
|
|
"school": school.get("name")
|
|
})
|
|
|
|
total = len(faculty_list)
|
|
faculty_list = faculty_list[skip:skip + limit]
|
|
|
|
return {
|
|
"total": total,
|
|
"skip": skip,
|
|
"limit": limit,
|
|
"faculty": faculty_list
|
|
}
|
|
|
|
|
|
@router.get("/university/{university_id}/export")
|
|
def export_result(
|
|
university_id: int,
|
|
format: str = Query("json", enum=["json"]),
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""导出爬取结果"""
|
|
result = db.query(ScrapeResult).filter(
|
|
ScrapeResult.university_id == university_id
|
|
).order_by(ScrapeResult.created_at.desc()).first()
|
|
|
|
if not result:
|
|
raise HTTPException(status_code=404, detail="没有爬取结果")
|
|
|
|
if format == "json":
|
|
return JSONResponse(
|
|
content=result.result_data,
|
|
headers={
|
|
"Content-Disposition": f"attachment; filename=university_{university_id}_result.json"
|
|
}
|
|
)
|
|
|
|
raise HTTPException(status_code=400, detail="不支持的格式")
|