Python 爬虫实践:爬取链家网二手房数据
最编程
2024-06-03 17:01:10
...
for page in range(1, 101):
print('===========================正在下载第{}页数据================================'.format(page))
time.sleep(1)
url = 'https://cs.lianjia.com/ershoufang/pg{}/'.format(page)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
}
response = requests.get(url=url, headers=headers)
selector = parsel.Selector(response.text)
lis = selector.css('.sellListContent li')
dit = {}
for li in lis:
title = li.css('.title a::text').get()
dit['标题'] = title
positionInfo = li.css('.positionInfo a::text').getall()
info = '-'.join(positionInfo)
dit['开发商'] = info
houseInfo = li.css('.houseInfo::text').get()
dit['房子信息'] = houseInfo
followInfo = li.css('.followInfo::text').get()
dit['发布周期'] = followInfo
Price = li.css('.totalPrice span::text').get()
dit['售价/万'] = Price
unitPrice = li.css('.unitPrice span::text').get()
dit['单价'] = unitPrice
csv_writer.writerow(dit)
print(dit)
print('===========================正在下载第{}页数据================================'.format(page))
time.sleep(1)
url = 'https://cs.lianjia.com/ershoufang/pg{}/'.format(page)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
}
response = requests.get(url=url, headers=headers)
selector = parsel.Selector(response.text)
lis = selector.css('.sellListContent li')
dit = {}
for li in lis:
title = li.css('.title a::text').get()
dit['标题'] = title
positionInfo = li.css('.positionInfo a::text').getall()
info = '-'.join(positionInfo)
dit['开发商'] = info
houseInfo = li.css('.houseInfo::text').get()
dit['房子信息'] = houseInfo
followInfo = li.css('.followInfo::text').get()
dit['发布周期'] = followInfo
Price = li.css('.totalPrice span::text').get()
dit['售价/万'] = Price
unitPrice = li.css('.unitPrice span::text').get()
dit['单价'] = unitPrice
csv_writer.writerow(dit)
print(dit)
上一篇: (干)数据分析案例--以上海二手房为例
下一篇: Python-递归