本文共 4710 字,大约阅读时间需要 15 分钟。
&&&&&
# -*- coding: utf-8 -*from bs4 import BeautifulSoupimport requestsfrom xlwt import Workbookimport timeimport datetimeimport sysreload(sys)sys.setdefaultencoding('utf-8')def shuju(url,date,page): """ 提取指定公司,指定日期、指定页面的自动监测数据 """ fromdata = { "startTime":date, "pageIndex":page, } try: response = requests.post(url, data=fromdata) except: time.sleep(2) response = requests.post(url, data=fromdata) soup = BeautifulSoup(response.text, 'lxml') shujulist = soup.find_all('tr') datalist = [] for shuju in shujulist: try: linedata = shuju.find_all('td') lin01 = linedata[0].text.split()[0] lin02 = linedata[1].text.split()[0] lin03 = linedata[2].text.split()[0] lin04 = linedata[3].text.split()[0] lin05 = linedata[4].text.split()[0] lin06 = linedata[5].text.split()[0] lin07 = linedata[6].text.split()[0] lin08 = linedata[7].text.split()[0] try: lin09 = linedata[8].text.split()[0] except: lin09 = '' lin10 = linedata[9].text.split()[0] lin11 = linedata[10].text.split()[0] lin12 = linedata[11].text.split()[0] try: lin13 = linedata[12].text.split()[0] except: lin13 = '' data = [lin01,lin02,lin03,lin04,lin05,lin06,lin07,lin08,lin09,lin10,lin11,lin12,lin13] datalist.append(data) except: pass return datalistdef pageNumber(url,date): """ 返回公司指定日期自动监测数据的页数,便于for循环遍历 """ fromdata = { "startTime":date, "pageIndex":"", } try: response = requests.post(url, data=fromdata) except: time.sleep(1) response = requests.post(url, data=fromdata) soup = BeautifulSoup(response.text,'lxml') number = soup.find('span',class_="clr_b ver_mid").string.split('/')[1][0] compname = soup.find('div',class_="com_tit_new f_22 clr_3").string return number,compnamedef Date_list_generation(start,end): """ 生成指定日期段的一个列表 """ datelist = [] datestart = datetime.datetime.strptime(str(start), '%Y-%m-%d') dateend = datetime.datetime.strptime(str(end), '%Y-%m-%d') while datestart < dateend: datestart += datetime.timedelta(days=1) datelist.append(datestart.strftime('%Y-%m-%d')) return datelistdef pao(start,end,url): book = Workbook(encoding='utf-8') sheet1 = book.add_sheet('Sheet 1') sheet1.write(0, 0, u'序号') sheet1.write(0, 1, u'监测点位') sheet1.write(0, 2, u'监测时间') sheet1.write(0, 3, u'监测项目') sheet1.write(0, 4, u'监测结果') sheet1.write(0, 5, u'标准限值') sheet1.write(0, 6, u'单位') sheet1.write(0, 7, u'是否达标') sheet1.write(0, 8, u'超标倍数') sheet1.write(0, 9, u'评价标准') sheet1.write(0, 10, u'排放去向') sheet1.write(0, 11, u'排放方式') sheet1.write(0, 12, u'备注') datalistnew = [] for date in Date_list_generation(start, end): pagenumber, compname = pageNumber(url, date) for page in range(1, int(pagenumber) + 1): try: datalist = shuju(url, date, page) print date, page time.sleep(0.8) except: print page datalistnew = datalistnew + datalist time.sleep(0.8) datalist = datalistnew for data in range(0, len(datalist)): culumn01 = datalist[data][0] culumn02 = datalist[data][1] culumn03 = datalist[data][2] culumn04 = datalist[data][3] culumn05 = datalist[data][4] culumn06 = datalist[data][5] culumn07 = datalist[data][6] culumn08 = datalist[data][7] culumn09 = datalist[data][8] culumn10 = datalist[data][9] culumn11 = datalist[data][10] culumn12 = datalist[data][11] culumn13 = datalist[data][12] sheet1.write(data + 1, 0, culumn01) sheet1.write(data + 1, 1, culumn02) sheet1.write(data + 1, 2, culumn03) sheet1.write(data + 1, 3, culumn04) sheet1.write(data + 1, 4, culumn05) sheet1.write(data + 1, 5, culumn06) sheet1.write(data + 1, 6, culumn07) sheet1.write(data + 1, 7, culumn08) sheet1.write(data + 1, 8, culumn09) sheet1.write(data + 1, 9, culumn10) sheet1.write(data + 1, 10, culumn11) sheet1.write(data + 1, 11, culumn12) sheet1.write(data + 1, 12, culumn13) tablename = "%s_%s_%s.xls" % (compname, start, end) book.save(tablename)if __name__ == "__main__": start = "2017-05-01" end = "2017-06-01" url = "http://58.30.229.134/monitor-pub/org_zdjc/e3e8b6b7-578a-4982-93bf-5484c49b3e5e.do" pao(start,end,url)
&&&&&
转载地址:http://ifpta.baihongyu.com/