# -*- coding:utf-8 -*- import re import os import requests import yagmail import urllib3 import logging from concurrent.futures import ThreadPoolExecutor import time import threading from requests.packages.urllib3.exceptions import InsecureRequestWarning import xlwt import xlrd import socket from xlutils.copy import copy #获取状态码、标题 header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36', } def get_ip(url): url = url.strip('\n').replace('http://','') myaddr = socket.getaddrinfo(url, 'http') return myaddr[0][4][0] def get_codetitle(url): code = "无法访问" title = " " resurl = " " try: urllib3.disable_warnings() requests.adapters.DEFAULT_RETRIES = 5 res = requests.get(url, headers=header, verify=False, allow_redirects=True, timeout=(3,12)) res.encoding = res.apparent_encoding code = res.status_code title = re.findall(r"(?<=\<title\>)(?:.|\n)+?(?=\<)", res.text, re.IGNORECASE)[0].strip() description = re.findall(r"(?<=\<meta name=\"description\" content=\")(?:.|\n)+?(?=\" \/\>|\"\/\>)", res.text, re.IGNORECASE)[0].strip() keywords = re.findall(r"(?<=\<meta name=\"keywords\" content=\")(?:.|\n)+?(?=\" \/\>|\"\/\>)", res.text, re.IGNORECASE)[0].strip() resurl = res.url except Exception as error: print('%s网址无效或者IP被封锁'%(url)) try: ip = get_ip(url) except: ip = 'null' return resurl,code,title,description,keywords,ip def write(url): codetitle = get_codetitle(url) resurl=str(codetitle[0]) code=str(codetitle[1]) title=str(codetitle[2]) description=str(codetitle[3]) keywords=str(codetitle[4]) ip=str(codetitle[5]) print(url+ "|" +code+ "|" +title+ "|" +ip) with lock: word_book = xlrd.open_workbook(path+savefilename+'.xls') sheets = word_book.sheet_names() work_sheet = word_book.sheet_by_name(sheets[0]) old_rows = work_sheet.nrows heads = work_sheet.row_values(0) new_work_book = copy(word_book) new_sheet = new_work_book.get_sheet(0) i = old_rows new_sheet.write(i, 0, url) new_sheet.write(i, 1, resurl) new_sheet.write(i, 2, code) new_sheet.write(i, 3, title) new_sheet.write(i, 4, description) new_sheet.write(i, 5, keywords) new_sheet.write(i, 6, ip) new_work_book.save(path + savefilename+'.xls') n = 0 path = os.getcwd() logging.captureWarnings(True) requests.packages.urllib3.disable_warnings(InsecureRequestWarning) start = time.time() lock = threading.Lock() savefilename = time.strftime("%Y-%m-%d %H.%M.%S") myxls=xlwt.Workbook() sheet1=myxls.add_sheet(u'title',cell_overwrite_ok=True) sheet1.write(0,0,"源地址") sheet1.write(0,1,"跳转地址") sheet1.write(0,2,"状态码") sheet1.write(0,3,"标题") sheet1.write(0,4,"描述") sheet1.write(0,5,"关键词") sheet1.write(0,6,"IP") myxls.save(path + savefilename+'.xls') #url.txt中ip:port格式转换成http、https格式,保存到url-run.txt中 with open(path + "\\url.txt","r") as f: line = f.readlines() with open(path + "\\url-run.txt","w") as f2: for i in line: i=i.strip('\n') if 'http://' not in i: f2.write('http://'+i+'\n') else: f2.write(i+'\n') #获取url列表 with open(path + '\\url-run.txt', 'r', encoding='utf-8') as f: urls_data = [data.strip().strip('\\') for data in f] #多线程 with ThreadPoolExecutor(max_workers=100) as executor: for urls in urls_data: executor.submit( write, url=urls ) end = time.time() print("总耗时:",end - start,"秒") # 发送邮箱提醒 try: yag = yagmail.SMTP(user="xxxxxx@163.com", password="密码你的", host='smtp.163.com')#smtp的邮箱和密码 contents = ['TDK获取时间:%s'%(end - start)] #主要内容 DDOSD_Sender = f'TDK获取完成通知' #标题 receiver =["xxxx@qq.com"] # 接受的邮箱 多个邮箱 yag.send(receiver, DDOSD_Sender, contents) #提交发送 yag.close() #结束进程 except: print('smtp 同个时间发送超过10条或者过期')
纯干货,原创源码,使用方法也很简单,请将py文件和url.txt保存在一个文件夹即可,会生成xlsx的报告文件
这里提供成品软件下载:https://www.123pan.com/s/ePprVv-dPoJ
效果图:
速度很快,大概一分钟能跑几千个网站,准确率绝对没问题,不存在缓存
尊重原创哈,感谢大家!
https://www.123pan.com/s/ePprVv-dPoJ
https://www.123pan.com/s/ePprVv-vPoJhttps://www.123pan.com/s/ePprVv-vPoJhttps://www.123pan.com/s/ePprVv-dPoJ