0x01 原因

造个轮子的原因是因为其他的权重查询用着好像不是太友好,总是有些小毛病,漏一个,少一个,结果不对的一些问题,自己就也弄一个,有个缺点就是慢点,优点就是查的比较稳,很少出现结果是n或者漏几个url的问题

0x02 代码

  • 主程序代码:
# coding=utf-8
import requests   #Author:斯文
import re
import time
from threading import Thread
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

zhanzhang_headers = {
    'Host': 'rank.chinaz.com',
    'Cache-Control': 'max-age=0',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Cookie': 'BDTUJIAID=febc82b216a29e116730505bc1e471a9; inputbox_urls=%5b%22passivcashincome.com%22%2c%22feifeizuida.com%22%5d; UM_distinctid=16e63892b4e3b1-031b6053dcfc9f-7711b3e-100200-16e63892b4fa8a; Hm_lvt_aecc9715b0f5d5f7f34fba48a3c511d6=1579746706; CNZZDATA433095=cnzz_eid%3D297046501-1578041490-null%26ntime%3D1583974744; CNZZDATA5082706=cnzz_eid%3D902178444-1578044637-null%26ntime%3D1583975389; qHistory=aHR0cDovL3Rvb2wuY2hpbmF6LmNvbV/nq5nplb/lt6Xlhbd8aHR0cDovL3JhbmsuY2hpbmF6LmNvbV/nmb7luqbmnYPph43mn6Xor6J8aHR0cDovL3Rvb2wuY2hpbmF6LmNvbS90b29scy9lc2NhcGUuYXNweF9Fc2NhcGXliqDlr4Yv6Kej5a+GfGh0dHA6Ly93aG9pcy5jaGluYXouY29tL3JldmVyc2UrV2hvaXPlj43mn6V8aHR0cDovL3dob2lzLmNoaW5hei5jb20vK1dob2lz5p+l6K+i',
    'Connection': 'close'
}

def process():
    print('[+] 正在后台打开谷歌浏览器...')
    chrome_option = Options()
    chrome_option.add_argument('blink-settings=imagesEnabled=false') #不加载图片, 提升速度
    chrome_option.add_argument('--headless') #浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败
    chrome_option.add_experimental_option('excludeSwitches', ['enable-logging'])#关闭控制台日志,看着太乱

    driver=webdriver.Chrome(options=chrome_option)
    driver.set_page_load_timeout(5000) 

    print('[+] 正在查询中,请稍等 ~')
    num=0
    with open("url.txt") as f:
        for line in f:
            try:
                quanzhong=line.strip('\n')
                site=quanzhong.strip('https://') 
                driver.get('http://rank.chinaz.com/all/{domain}'.format(domain=site)) 
                baidurank_pattern = re.compile(r'csstools.chinaz.com/tools/images/public/baiduapp/(.*?).gif')

                try:
                    html_text = driver.page_source.encode('utf-8')
                    baidurank = re.findall(baidurank_pattern,html_text.decode('utf-8'))[0]
                except:
                    time.sleep(2.5)
                    html_text = driver.page_source.encode('utf-8')
                    baidurank = re.findall(baidurank_pattern,html_text.decode('utf-8'))[0]
                num=num+1
                print("[+] 正在查询第"+str(num)+"条"+"   百度权重:"+str(baidurank)+"   url: "+site)

                if int(baidurank) > 0:
                    with open('seo_1.txt','a',encoding='utf-8') as l:
                        l.write(site+'\n')
            except Exception as e:
                pass     
    driver.close()

0x03 运行

  • 1.python3 Baidu_Rank.py 即可
  • 2.内置读取同目录的url.txt文件
  • 3.权重大于1的网址存入同目录rank_1.txt文件

github链接:https://github.com/sv3nbeast/Baidu_Rank