import os
import time
import random
from urllib.parse import quote
from seleniumwire.utils import decode
from seleniumwire import webdriver
from user_agent import generate_user_agent
from loguru import logger
def selenium_wire_search(shopname, city):
if os.path.exists(f"logs/{city}/{shopname}.json"):
print(f"crawled {shopname} {city}")
return False
cities = {"beijing": "2", "shanghai": "1", "guangzhou": "4", "shenzhen": "7"}
options = webdriver.ChromeOptions()
UA = generate_user_agent(device_type="smartphone")
options.add_argument(f"user-agent={UA}")
driver = webdriver.Chrome(chrome_options=options)
shopname_url = quote(shopname, "utf-8")
url = f"https://m.dianping.com/shoplist/{cities[city]}/search?from=m_search&keyword={shopname_url}"
driver.get(url)
for request in driver.requests:
if request.response:
if "module" in request.url:
print(request.response.status_code)
print(request.params, request.body)
print(request.response.headers)
data = request.response.body
try:
if "Content-Encoding" not in request.response.headers.keys():
logger.warning("this request is error")
continue
body = decode(data, request.response.headers.get("Content-Encoding", "gzip"))
print(len(body))
if not os.path.exists(f"logs/{city}"):
os.mkdir(f"logs/{city}")
with open(f"logs/{city}/{shopname}.json", "w") as file:
file.write(body.decode("utf-8"))
logger.info(f"save success {shopname} {city}")
break
except Exception as err:
logger.error(err)
driver.quit()
return True
def main():
with open("logs/dianping_shops.txt", "r", encoding="utf-8") as file:
shops = [s.strip() for s in file.readlines()]
cities = {"beijing": "2", "shanghai": "1", "guangzhou": "4", "shenzhen": "7"}
for shopname in shops:
print(shopname)
for city in list(cities.keys()):
temp = selenium_wire_search(shopname, city)
if temp:
time_sleep = random.randint(5, 10)
else:
time_sleep = 0
print(time_sleep)
time.sleep(time_sleep)
if __name__ == "__main__":
main()