Add university scraper system with backend, frontend, and configs
- Add src/university_scraper module with scraper, analyzer, and CLI - Add backend FastAPI service with API endpoints and database models - Add frontend React app with university management pages - Add configs for Harvard, Manchester, and UCL universities - Add artifacts with various scraper implementations - Add Docker compose configuration for deployment - Update .gitignore to exclude generated files 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
167
backend/app/api/scripts.py
Normal file
167
backend/app/api/scripts.py
Normal file
@ -0,0 +1,167 @@
|
||||
"""爬虫脚本API"""
|
||||
|
||||
from typing import List
|
||||
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..database import get_db
|
||||
from ..models import University, ScraperScript
|
||||
from ..schemas.script import (
|
||||
ScriptCreate,
|
||||
ScriptResponse,
|
||||
GenerateScriptRequest,
|
||||
GenerateScriptResponse
|
||||
)
|
||||
from ..services.script_generator import generate_scraper_script
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/generate", response_model=GenerateScriptResponse)
|
||||
async def generate_script(
|
||||
data: GenerateScriptRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
一键生成爬虫脚本
|
||||
|
||||
分析大学网站结构,自动生成爬虫脚本
|
||||
"""
|
||||
# 检查或创建大学记录
|
||||
university = db.query(University).filter(University.url == data.university_url).first()
|
||||
|
||||
if not university:
|
||||
# 从URL提取大学名称
|
||||
name = data.university_name
|
||||
if not name:
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(data.university_url)
|
||||
name = parsed.netloc.replace("www.", "").split(".")[0].title()
|
||||
|
||||
university = University(
|
||||
name=name,
|
||||
url=data.university_url,
|
||||
status="analyzing"
|
||||
)
|
||||
db.add(university)
|
||||
db.commit()
|
||||
db.refresh(university)
|
||||
else:
|
||||
# 更新状态
|
||||
university.status = "analyzing"
|
||||
db.commit()
|
||||
|
||||
# 在后台执行脚本生成
|
||||
background_tasks.add_task(
|
||||
generate_scraper_script,
|
||||
university_id=university.id,
|
||||
university_url=data.university_url
|
||||
)
|
||||
|
||||
return GenerateScriptResponse(
|
||||
success=True,
|
||||
university_id=university.id,
|
||||
script_id=None,
|
||||
message="正在分析网站结构并生成爬虫脚本...",
|
||||
status="analyzing"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/university/{university_id}", response_model=List[ScriptResponse])
|
||||
def get_university_scripts(
|
||||
university_id: int,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""获取大学的所有爬虫脚本"""
|
||||
scripts = db.query(ScraperScript).filter(
|
||||
ScraperScript.university_id == university_id
|
||||
).order_by(ScraperScript.version.desc()).all()
|
||||
|
||||
return scripts
|
||||
|
||||
|
||||
@router.get("/{script_id}", response_model=ScriptResponse)
|
||||
def get_script(
|
||||
script_id: int,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""获取脚本详情"""
|
||||
script = db.query(ScraperScript).filter(ScraperScript.id == script_id).first()
|
||||
if not script:
|
||||
raise HTTPException(status_code=404, detail="脚本不存在")
|
||||
|
||||
return script
|
||||
|
||||
|
||||
@router.post("", response_model=ScriptResponse)
|
||||
def create_script(
|
||||
data: ScriptCreate,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""手动创建脚本"""
|
||||
# 检查大学是否存在
|
||||
university = db.query(University).filter(University.id == data.university_id).first()
|
||||
if not university:
|
||||
raise HTTPException(status_code=404, detail="大学不存在")
|
||||
|
||||
# 获取当前最高版本
|
||||
max_version = db.query(ScraperScript).filter(
|
||||
ScraperScript.university_id == data.university_id
|
||||
).count()
|
||||
|
||||
script = ScraperScript(
|
||||
university_id=data.university_id,
|
||||
script_name=data.script_name,
|
||||
script_content=data.script_content,
|
||||
config_content=data.config_content,
|
||||
version=max_version + 1,
|
||||
status="active"
|
||||
)
|
||||
|
||||
db.add(script)
|
||||
db.commit()
|
||||
db.refresh(script)
|
||||
|
||||
# 更新大学状态
|
||||
university.status = "ready"
|
||||
db.commit()
|
||||
|
||||
return script
|
||||
|
||||
|
||||
@router.put("/{script_id}", response_model=ScriptResponse)
|
||||
def update_script(
|
||||
script_id: int,
|
||||
data: ScriptCreate,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""更新脚本"""
|
||||
script = db.query(ScraperScript).filter(ScraperScript.id == script_id).first()
|
||||
if not script:
|
||||
raise HTTPException(status_code=404, detail="脚本不存在")
|
||||
|
||||
script.script_content = data.script_content
|
||||
if data.config_content:
|
||||
script.config_content = data.config_content
|
||||
|
||||
db.commit()
|
||||
db.refresh(script)
|
||||
|
||||
return script
|
||||
|
||||
|
||||
@router.delete("/{script_id}")
|
||||
def delete_script(
|
||||
script_id: int,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""删除脚本"""
|
||||
script = db.query(ScraperScript).filter(ScraperScript.id == script_id).first()
|
||||
if not script:
|
||||
raise HTTPException(status_code=404, detail="脚本不存在")
|
||||
|
||||
db.delete(script)
|
||||
db.commit()
|
||||
|
||||
return {"message": "删除成功"}
|
||||
Reference in New Issue
Block a user