"""爬虫脚本API""" from typing import List from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks from sqlalchemy.orm import Session from ..database import get_db from ..models import University, ScraperScript from ..schemas.script import ( ScriptCreate, ScriptResponse, GenerateScriptRequest, GenerateScriptResponse ) from ..services.script_generator import generate_scraper_script router = APIRouter() @router.post("/generate", response_model=GenerateScriptResponse) async def generate_script( data: GenerateScriptRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db) ): """ 一键生成爬虫脚本 分析大学网站结构,自动生成爬虫脚本 """ # 检查或创建大学记录 university = db.query(University).filter(University.url == data.university_url).first() if not university: # 从URL提取大学名称 name = data.university_name if not name: from urllib.parse import urlparse parsed = urlparse(data.university_url) name = parsed.netloc.replace("www.", "").split(".")[0].title() university = University( name=name, url=data.university_url, status="analyzing" ) db.add(university) db.commit() db.refresh(university) else: # 更新状态 university.status = "analyzing" db.commit() # 在后台执行脚本生成 background_tasks.add_task( generate_scraper_script, university_id=university.id, university_url=data.university_url ) return GenerateScriptResponse( success=True, university_id=university.id, script_id=None, message="正在分析网站结构并生成爬虫脚本...", status="analyzing" ) @router.get("/university/{university_id}", response_model=List[ScriptResponse]) def get_university_scripts( university_id: int, db: Session = Depends(get_db) ): """获取大学的所有爬虫脚本""" scripts = db.query(ScraperScript).filter( ScraperScript.university_id == university_id ).order_by(ScraperScript.version.desc()).all() return scripts @router.get("/{script_id}", response_model=ScriptResponse) def get_script( script_id: int, db: Session = Depends(get_db) ): """获取脚本详情""" script = db.query(ScraperScript).filter(ScraperScript.id == script_id).first() if not script: raise HTTPException(status_code=404, detail="脚本不存在") return script @router.post("", response_model=ScriptResponse) def create_script( data: ScriptCreate, db: Session = Depends(get_db) ): """手动创建脚本""" # 检查大学是否存在 university = db.query(University).filter(University.id == data.university_id).first() if not university: raise HTTPException(status_code=404, detail="大学不存在") # 获取当前最高版本 max_version = db.query(ScraperScript).filter( ScraperScript.university_id == data.university_id ).count() script = ScraperScript( university_id=data.university_id, script_name=data.script_name, script_content=data.script_content, config_content=data.config_content, version=max_version + 1, status="active" ) db.add(script) db.commit() db.refresh(script) # 更新大学状态 university.status = "ready" db.commit() return script @router.put("/{script_id}", response_model=ScriptResponse) def update_script( script_id: int, data: ScriptCreate, db: Session = Depends(get_db) ): """更新脚本""" script = db.query(ScraperScript).filter(ScraperScript.id == script_id).first() if not script: raise HTTPException(status_code=404, detail="脚本不存在") script.script_content = data.script_content if data.config_content: script.config_content = data.config_content db.commit() db.refresh(script) return script @router.delete("/{script_id}") def delete_script( script_id: int, db: Session = Depends(get_db) ): """删除脚本""" script = db.query(ScraperScript).filter(ScraperScript.id == script_id).first() if not script: raise HTTPException(status_code=404, detail="脚本不存在") db.delete(script) db.commit() return {"message": "删除成功"}