본문 바로가기
WEB/Python

Requests 와 Re Modules을 이용한 웹 크롤러로 로또 번호 크롤링하기

by sit_min 2019. 1. 13.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import requests
import re
 
total_LOTTO_number_list=[]
start_number=262 # Please write the beginning of the Crawling Lotto.
end_number=265 # Please write the ending of the Crawling Lotto.
for time in range(start_number,end_number,1):
    url='https://www.dhlottery.co.kr/gameResult.do?method=byWin&drwNo='+str(time)
    webpage= requests.get(url)
    LOTTOnumbersource=str(webpage.content)
    LOTTO_number_list=[]
    find_class_num=LOTTOnumbersource.find("num win")
    find_class_bonus_num=LOTTOnumbersource.find("tbl_data tbl_data_col")
    LOTTO_number_class_loca=LOTTOnumbersource[find_class_num:find_class_bonus_num]
 
    replacing_source=LOTTO_number_class_loca.replace('\\t','').replace("\\n","").replace("\\r","")
    patten=re.compile('>[0-9]+<')
    not_number_list=patten.findall(replacing_source)
    for num in not_number_list:
        LOTTO_number_list.append(num.strip("> <"))
    
    total_LOTTO_number_list.append(LOTTO_number_list)
 
 
print(total_LOTTO_number_list)
cs