본문 바로가기
브라우저 자동화&크롤링/python+selenium

국가기록원 기록물 뷰어 JPG 다운로드 by 파이썬

by 일코 2021. 1. 25.
from time import sleep
import selenium
from selenium import webdriver
driver = webdriver.Chrome(r"C:\Users\User\PycharmProjects\card_pub\chromedriver.exe")
driver.get(r'http://www.archives.go.kr/next/search/searchTotalUp.do?totalSearchType=1&upside_query=%EA%B4%91%EC%97%85')
# 사진, 필름류 클릭
driver.find_element_by_xpath('/html/body/div/div/div[7]/div[1]/ol/li[4]/a').click()
# 100개보기 클릭
driver.find_element_by_xpath('/html/body/div/div/div[7]/div[2]/div/div/fieldset/form/div/div[2]/div[2]/select/option[5]').click()
driver.find_element_by_xpath('/html/body/div/div/div[7]/div[2]/div/div/fieldset/form/div/div[2]/div[2]/input').click()

pic_list = [i.get_attribute('href') for i in driver.find_elements_by_css_selector('a[title="새창으로 열림"]')]
for page in range(2, 4):
    for i in pic_list:
        try:
            driver.get(i)
        except selenium.common.exceptions.WebDriverException as e:
            print(e)
            continue
        sleep(1)
        # popup = driver.window_handles[1]
        # sleep(2)
        # driver.switch_to.window(popup)
        # sleep(1)
        driver.execute_script('save_image("C")') # 다이렉트 다운로드
        download_popup = driver.window_handles[1]
        driver.switch_to.window(download_popup)
        sleep(3)
        driver.close()
        driver.switch_to.window(driver.window_handles[0])
        driver.back()
    driver.execute_script(f'rePage({page})')
    sleep(3)

댓글