Add university scraper system with backend, frontend, and configs
- Add src/university_scraper module with scraper, analyzer, and CLI - Add backend FastAPI service with API endpoints and database models - Add frontend React app with university management pages - Add configs for Harvard, Manchester, and UCL universities - Add artifacts with various scraper implementations - Add Docker compose configuration for deployment - Update .gitignore to exclude generated files 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
175
backend/app/api/results.py
Normal file
175
backend/app/api/results.py
Normal file
@ -0,0 +1,175 @@
|
||||
"""爬取结果API"""
|
||||
|
||||
from typing import Optional
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from fastapi.responses import JSONResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..database import get_db
|
||||
from ..models import ScrapeResult
|
||||
from ..schemas.result import ResultResponse
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/university/{university_id}", response_model=ResultResponse)
|
||||
def get_university_result(
|
||||
university_id: int,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""获取大学最新的爬取结果"""
|
||||
result = db.query(ScrapeResult).filter(
|
||||
ScrapeResult.university_id == university_id
|
||||
).order_by(ScrapeResult.created_at.desc()).first()
|
||||
|
||||
if not result:
|
||||
raise HTTPException(status_code=404, detail="没有爬取结果")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@router.get("/university/{university_id}/schools")
|
||||
def get_schools(
|
||||
university_id: int,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""获取学院列表"""
|
||||
result = db.query(ScrapeResult).filter(
|
||||
ScrapeResult.university_id == university_id
|
||||
).order_by(ScrapeResult.created_at.desc()).first()
|
||||
|
||||
if not result:
|
||||
raise HTTPException(status_code=404, detail="没有爬取结果")
|
||||
|
||||
schools = result.result_data.get("schools", [])
|
||||
|
||||
# 返回简化的学院列表
|
||||
return {
|
||||
"total": len(schools),
|
||||
"schools": [
|
||||
{
|
||||
"name": s.get("name"),
|
||||
"url": s.get("url"),
|
||||
"program_count": len(s.get("programs", []))
|
||||
}
|
||||
for s in schools
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@router.get("/university/{university_id}/programs")
|
||||
def get_programs(
|
||||
university_id: int,
|
||||
school_name: Optional[str] = Query(None, description="按学院筛选"),
|
||||
search: Optional[str] = Query(None, description="搜索项目名称"),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""获取项目列表"""
|
||||
result = db.query(ScrapeResult).filter(
|
||||
ScrapeResult.university_id == university_id
|
||||
).order_by(ScrapeResult.created_at.desc()).first()
|
||||
|
||||
if not result:
|
||||
raise HTTPException(status_code=404, detail="没有爬取结果")
|
||||
|
||||
schools = result.result_data.get("schools", [])
|
||||
programs = []
|
||||
|
||||
for school in schools:
|
||||
if school_name and school.get("name") != school_name:
|
||||
continue
|
||||
|
||||
for prog in school.get("programs", []):
|
||||
if search and search.lower() not in prog.get("name", "").lower():
|
||||
continue
|
||||
|
||||
programs.append({
|
||||
"name": prog.get("name"),
|
||||
"url": prog.get("url"),
|
||||
"degree_type": prog.get("degree_type"),
|
||||
"school": school.get("name"),
|
||||
"faculty_count": len(prog.get("faculty", []))
|
||||
})
|
||||
|
||||
return {
|
||||
"total": len(programs),
|
||||
"programs": programs
|
||||
}
|
||||
|
||||
|
||||
@router.get("/university/{university_id}/faculty")
|
||||
def get_faculty(
|
||||
university_id: int,
|
||||
school_name: Optional[str] = Query(None, description="按学院筛选"),
|
||||
program_name: Optional[str] = Query(None, description="按项目筛选"),
|
||||
search: Optional[str] = Query(None, description="搜索导师姓名"),
|
||||
skip: int = Query(0, ge=0),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""获取导师列表"""
|
||||
result = db.query(ScrapeResult).filter(
|
||||
ScrapeResult.university_id == university_id
|
||||
).order_by(ScrapeResult.created_at.desc()).first()
|
||||
|
||||
if not result:
|
||||
raise HTTPException(status_code=404, detail="没有爬取结果")
|
||||
|
||||
schools = result.result_data.get("schools", [])
|
||||
faculty_list = []
|
||||
|
||||
for school in schools:
|
||||
if school_name and school.get("name") != school_name:
|
||||
continue
|
||||
|
||||
for prog in school.get("programs", []):
|
||||
if program_name and prog.get("name") != program_name:
|
||||
continue
|
||||
|
||||
for fac in prog.get("faculty", []):
|
||||
if search and search.lower() not in fac.get("name", "").lower():
|
||||
continue
|
||||
|
||||
faculty_list.append({
|
||||
"name": fac.get("name"),
|
||||
"url": fac.get("url"),
|
||||
"title": fac.get("title"),
|
||||
"email": fac.get("email"),
|
||||
"program": prog.get("name"),
|
||||
"school": school.get("name")
|
||||
})
|
||||
|
||||
total = len(faculty_list)
|
||||
faculty_list = faculty_list[skip:skip + limit]
|
||||
|
||||
return {
|
||||
"total": total,
|
||||
"skip": skip,
|
||||
"limit": limit,
|
||||
"faculty": faculty_list
|
||||
}
|
||||
|
||||
|
||||
@router.get("/university/{university_id}/export")
|
||||
def export_result(
|
||||
university_id: int,
|
||||
format: str = Query("json", enum=["json"]),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""导出爬取结果"""
|
||||
result = db.query(ScrapeResult).filter(
|
||||
ScrapeResult.university_id == university_id
|
||||
).order_by(ScrapeResult.created_at.desc()).first()
|
||||
|
||||
if not result:
|
||||
raise HTTPException(status_code=404, detail="没有爬取结果")
|
||||
|
||||
if format == "json":
|
||||
return JSONResponse(
|
||||
content=result.result_data,
|
||||
headers={
|
||||
"Content-Disposition": f"attachment; filename=university_{university_id}_result.json"
|
||||
}
|
||||
)
|
||||
|
||||
raise HTTPException(status_code=400, detail="不支持的格式")
|
||||
Reference in New Issue
Block a user