파이썬으로 로또당첨번호 수집하기

2019. 10. 29. 19:47파이썬

from urllib.request import urlopen
from bs4 import BeautifulSoup
import ssl
import datetime
import sys
import pandas as pd
from IPython.display import display
import os

   
def main(base_dir) :   
    dir = base_dir
   
    print(dir)
   
    if os.path.exists('%s/lotto.csv' %dir) :
        df = pd.read_csv('%s/lotto.csv' %dir, index_col=0,parse_dates=['날짜'])
        last_turn = df['회차'].max()
        #print(last_turn)
    else :
        df = pd.DataFrame(columns=['회차', '날짜', '1', '2', '3', '4', '5', '6', '7'])
        last_turn = 0
   
    #print(df)
       
    context = ssl._create_unverified_context()
   
    #lotto = []
   
    for turn in  range(last_turn+1,last_turn+1000) :
        print(turn)
        url = 'https://URL?PARAMATER=%d' %turn
        with urlopen(url,context=context) as response :
            html = response.read()
            soup = BeautifulSoup(html, 'html.parser')
            #print(soup.prettify())
            div = soup.find('div', {'class':'bx_lotto_winnum'})
            #print(div)
            span = div.find_all('span')
            #print(span)
           
            try :
                option = soup.find('option', { 'value' : turn }).get_text()
                #print(date.find('('))
                #print(datetime.datetime.now())
                start = option.find('(')+1
                date = datetime.datetime.strptime(option[start : start+13], '%Y년 %m월 %d일')
                #print(date.split())
                #print(['2000년'])
                       
                number = [turn, date]
                for tag in span :
                    number.append(tag.get_text())
                number.remove('+')
                #print(number)
               
                #lotto.append(number)
                df.loc[turn] = number  
            except AttributeError :
                print(turn, '회는 없습니다')
                break
   

    #print(lotto)
    #pd.set_option('display.colheader_justify','left')
    #pd.set_option('display.max_colwidth',20)
    #df = pd.DataFrame(lotto, index=list(range(0,turn)), columns=['회차','날짜', '1', '2', '3', '4', '5', '6', '7'])
    #df.style.apply('highlight_max', subset=[1, 2, 3])
    #print(pd.get_option('display.colheader_justify'))
    #print(pd.get_option('display.max_colwidth'))
    #display(df)
    #pprint(df.head())
    print(df)
    df.to_csv('%s/lotto.csv' %dir, mode='w')
   
if __name__ == '__main__' :
    main(os.getcwd())

'파이썬' 카테고리의 다른 글

판다스 index 초기화  (0) 2020.12.31
shlex 문자열안의 따옴표 구분하기  (0) 2020.12.31
2차배열 정렬  (0) 2020.12.27
list 순서 뒤집기  (0) 2020.12.27
스마트폰에 파이썬 설치하기  (0) 2019.10.23