파이썬으로 로또당첨번호 수집하기
2019. 10. 29. 19:47ㆍ파이썬
from urllib.request import urlopen
from bs4 import BeautifulSoup
import ssl
import datetime
import sys
import pandas as pd
from IPython.display import display
import os
def main(base_dir) :
dir = base_dir
print(dir)
if os.path.exists('%s/lotto.csv' %dir) :
df = pd.read_csv('%s/lotto.csv' %dir, index_col=0,parse_dates=['날짜'])
last_turn = df['회차'].max()
#print(last_turn)
else :
df = pd.DataFrame(columns=['회차', '날짜', '1', '2', '3', '4', '5', '6', '7'])
last_turn = 0
#print(df)
context = ssl._create_unverified_context()
#lotto = []
for turn in range(last_turn+1,last_turn+1000) :
print(turn)
url = 'https://URL?PARAMATER=%d' %turn
with urlopen(url,context=context) as response :
html = response.read()
soup = BeautifulSoup(html, 'html.parser')
#print(soup.prettify())
div = soup.find('div', {'class':'bx_lotto_winnum'})
#print(div)
span = div.find_all('span')
#print(span)
try :
option = soup.find('option', { 'value' : turn }).get_text()
#print(date.find('('))
#print(datetime.datetime.now())
start = option.find('(')+1
date = datetime.datetime.strptime(option[start : start+13], '%Y년 %m월 %d일')
#print(date.split())
#print(['2000년'])
number = [turn, date]
for tag in span :
number.append(tag.get_text())
number.remove('+')
#print(number)
#lotto.append(number)
df.loc[turn] = number
except AttributeError :
print(turn, '회는 없습니다')
break
#print(lotto)
#pd.set_option('display.colheader_justify','left')
#pd.set_option('display.max_colwidth',20)
#df = pd.DataFrame(lotto, index=list(range(0,turn)), columns=['회차','날짜', '1', '2', '3', '4', '5', '6', '7'])
#df.style.apply('highlight_max', subset=[1, 2, 3])
#print(pd.get_option('display.colheader_justify'))
#print(pd.get_option('display.max_colwidth'))
#display(df)
#pprint(df.head())
print(df)
df.to_csv('%s/lotto.csv' %dir, mode='w')
if __name__ == '__main__' :
main(os.getcwd())
from bs4 import BeautifulSoup
import ssl
import datetime
import sys
import pandas as pd
from IPython.display import display
import os
def main(base_dir) :
dir = base_dir
print(dir)
if os.path.exists('%s/lotto.csv' %dir) :
df = pd.read_csv('%s/lotto.csv' %dir, index_col=0,parse_dates=['날짜'])
last_turn = df['회차'].max()
#print(last_turn)
else :
df = pd.DataFrame(columns=['회차', '날짜', '1', '2', '3', '4', '5', '6', '7'])
last_turn = 0
#print(df)
context = ssl._create_unverified_context()
#lotto = []
for turn in range(last_turn+1,last_turn+1000) :
print(turn)
url = 'https://URL?PARAMATER=%d' %turn
with urlopen(url,context=context) as response :
html = response.read()
soup = BeautifulSoup(html, 'html.parser')
#print(soup.prettify())
div = soup.find('div', {'class':'bx_lotto_winnum'})
#print(div)
span = div.find_all('span')
#print(span)
try :
option = soup.find('option', { 'value' : turn }).get_text()
#print(date.find('('))
#print(datetime.datetime.now())
start = option.find('(')+1
date = datetime.datetime.strptime(option[start : start+13], '%Y년 %m월 %d일')
#print(date.split())
#print(['2000년'])
number = [turn, date]
for tag in span :
number.append(tag.get_text())
number.remove('+')
#print(number)
#lotto.append(number)
df.loc[turn] = number
except AttributeError :
print(turn, '회는 없습니다')
break
#print(lotto)
#pd.set_option('display.colheader_justify','left')
#pd.set_option('display.max_colwidth',20)
#df = pd.DataFrame(lotto, index=list(range(0,turn)), columns=['회차','날짜', '1', '2', '3', '4', '5', '6', '7'])
#df.style.apply('highlight_max', subset=[1, 2, 3])
#print(pd.get_option('display.colheader_justify'))
#print(pd.get_option('display.max_colwidth'))
#display(df)
#pprint(df.head())
print(df)
df.to_csv('%s/lotto.csv' %dir, mode='w')
if __name__ == '__main__' :
main(os.getcwd())
'파이썬' 카테고리의 다른 글
판다스 index 초기화 (0) | 2020.12.31 |
---|---|
shlex 문자열안의 따옴표 구분하기 (0) | 2020.12.31 |
2차배열 정렬 (0) | 2020.12.27 |
list 순서 뒤집기 (0) | 2020.12.27 |
스마트폰에 파이썬 설치하기 (0) | 2019.10.23 |