精简GolangSelenium启动Chrome,Cookie到本地,爬取链接
package main import ( "encoding/json" "github.com/antchfx/htmlquery" "github.com/tebeka/selenium" "io/ioutil" "log" "strings" "time" ) func main() { //设置日志输出格式 log.SetFlags(log.LstdFlags | log.Lshortfile) //初始化1个chrome服务,端口使用4444 service, err := selenium.NewChromeDriverService("chromedriver.exe", 4444) if err != nil { log.Println(err) return } defer service.Stop() //启动本地chrome浏览器 caps := selenium.Capabilities{} wd, err := selenium.NewRemote(caps, "http://127.0.0.1:4444/wd/hub") if err != nil { log.Println(err) return } defer wd.Quit() //打开页面 err = wd.Get("https://www.baidu.com/") if err != nil { log.Println(err) return } for true { //获取所有的handler handles, err := wd.WindowHandles() if err != nil { log.Println(err) return } //每次都跳转到最后一个handler err = wd.SwitchWindow(handles[len(handles)-1]) if err != nil { log.Println(err) return } //获取当前的url地址 currentURL, err := wd.CurrentURL() if err != nil { log.Println(err) return } log.Println(currentURL) //获取cookies cookies, err := wd.GetCookies() if err != nil { log.Println(err) return } //尝试输出看看cookies的信息 for i, cookie := range cookies { log.Println(i, cookie) } //通过json将cookies编码 marshalIndent, err := json.MarshalIndent(cookies, "", " ") if err != nil { log.Println(err) return } log.Println(marshalIndent) //将cookies编码后的json写入到本地文件 err = ioutil.WriteFile( "cks.dat", marshalIndent, 0777, ) if err != nil { log.Println(err) return } //通过xpath获取当前页面的a链接 pageSource, err := wd.PageSource() if err != nil { log.Println(err) return } newReader := strings.NewReader(pageSource) parse, err := htmlquery.Parse(newReader) if err != nil { log.Println(err) return } nodes := htmlquery.Find(parse, "//a[@href]") for i, node := range nodes { log.Println(i, htmlquery.InnerText(node), htmlquery.SelectAttr(node, "href")) } time.Sleep(time.Second * 1) break } }