"""爬取结果API""" from typing import Optional from fastapi import APIRouter, Depends, HTTPException, Query from fastapi.responses import JSONResponse from sqlalchemy.orm import Session from ..database import get_db from ..models import ScrapeResult from ..schemas.result import ResultResponse router = APIRouter() @router.get("/university/{university_id}", response_model=ResultResponse) def get_university_result( university_id: int, db: Session = Depends(get_db) ): """获取大学最新的爬取结果""" result = db.query(ScrapeResult).filter( ScrapeResult.university_id == university_id ).order_by(ScrapeResult.created_at.desc()).first() if not result: raise HTTPException(status_code=404, detail="没有爬取结果") return result @router.get("/university/{university_id}/schools") def get_schools( university_id: int, db: Session = Depends(get_db) ): """获取学院列表""" result = db.query(ScrapeResult).filter( ScrapeResult.university_id == university_id ).order_by(ScrapeResult.created_at.desc()).first() if not result: raise HTTPException(status_code=404, detail="没有爬取结果") schools = result.result_data.get("schools", []) # 返回简化的学院列表 return { "total": len(schools), "schools": [ { "name": s.get("name"), "url": s.get("url"), "program_count": len(s.get("programs", [])) } for s in schools ] } @router.get("/university/{university_id}/programs") def get_programs( university_id: int, school_name: Optional[str] = Query(None, description="按学院筛选"), search: Optional[str] = Query(None, description="搜索项目名称"), db: Session = Depends(get_db) ): """获取项目列表""" result = db.query(ScrapeResult).filter( ScrapeResult.university_id == university_id ).order_by(ScrapeResult.created_at.desc()).first() if not result: raise HTTPException(status_code=404, detail="没有爬取结果") schools = result.result_data.get("schools", []) programs = [] for school in schools: if school_name and school.get("name") != school_name: continue for prog in school.get("programs", []): if search and search.lower() not in prog.get("name", "").lower(): continue programs.append({ "name": prog.get("name"), "url": prog.get("url"), "degree_type": prog.get("degree_type"), "school": school.get("name"), "faculty_count": len(prog.get("faculty", [])) }) return { "total": len(programs), "programs": programs } @router.get("/university/{university_id}/faculty") def get_faculty( university_id: int, school_name: Optional[str] = Query(None, description="按学院筛选"), program_name: Optional[str] = Query(None, description="按项目筛选"), search: Optional[str] = Query(None, description="搜索导师姓名"), skip: int = Query(0, ge=0), limit: int = Query(50, ge=1, le=200), db: Session = Depends(get_db) ): """获取导师列表""" result = db.query(ScrapeResult).filter( ScrapeResult.university_id == university_id ).order_by(ScrapeResult.created_at.desc()).first() if not result: raise HTTPException(status_code=404, detail="没有爬取结果") schools = result.result_data.get("schools", []) faculty_list = [] for school in schools: if school_name and school.get("name") != school_name: continue for prog in school.get("programs", []): if program_name and prog.get("name") != program_name: continue for fac in prog.get("faculty", []): if search and search.lower() not in fac.get("name", "").lower(): continue faculty_list.append({ "name": fac.get("name"), "url": fac.get("url"), "title": fac.get("title"), "email": fac.get("email"), "program": prog.get("name"), "school": school.get("name") }) total = len(faculty_list) faculty_list = faculty_list[skip:skip + limit] return { "total": total, "skip": skip, "limit": limit, "faculty": faculty_list } @router.get("/university/{university_id}/export") def export_result( university_id: int, format: str = Query("json", enum=["json"]), db: Session = Depends(get_db) ): """导出爬取结果""" result = db.query(ScrapeResult).filter( ScrapeResult.university_id == university_id ).order_by(ScrapeResult.created_at.desc()).first() if not result: raise HTTPException(status_code=404, detail="没有爬取结果") if format == "json": return JSONResponse( content=result.result_data, headers={ "Content-Disposition": f"attachment; filename=university_{university_id}_result.json" } ) raise HTTPException(status_code=400, detail="不支持的格式")