플그래밍/파이써언

[파이썬] Selenium - WIKIMEDIA 메인페이지 이미지 다운로드 방법

훗티v 2024. 3. 15. 19:35
반응형

이번에는 아래 메인 페이지에 있는 이미지의 src를 추출해서 직접 파일로 다운로드해볼께요

 

 

파이썬 코드

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import time

option = Options()
# option.add_argument('--headless')
option.add_argument('--disable-gpu')
option.add_argument('--window-size=1920x1080')
option.add_argument('--start-maximized')
option.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36')
# option.add_argument('--window-position=-32000,-32000')
chromedriver_version = "114.0.5735.16"

service = Service()
# service.creation_flags = CREATE_NO_WINDOW
driver = uc.Chrome(service=service, options=option)
# driver.maximize_window()
driver.minimize_window()
driver.implicitly_wait(10)

url = f"https://commons.wikimedia.org/wiki/Main_Page"
driver.get(url)

# Wait for the page to load
driver.implicitly_wait(10)

# Find the div element with id "mf-picture-picture"
picture_div = driver.find_element(By.ID, 'mf-picture-picture')

# Find the img tag within the div
img_tag = picture_div.find_element(By.TAG_NAME, 'img')

# Get the src attribute value of the img tag
img_src = img_tag.get_attribute('src')

print("이미지 src:", img_src)

driver.quit()

# Send a GET request to the URL
response = requests.get(img_src)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Open a file in binary write mode to save the image
    with open("image.jpg", "wb") as file:
        # Write the content of the response to the file
        file.write(response.content)
    print("'image.jpg' 다운로드 완료.")
else:
    print("다운로드 실패. 에러 코드:", response.status_code)

 

1. selenium으로 추출하고자하는 image src 추출

2. 추출한 image src를 requests를 활용하여 이미지 파일로 다운로드.

 

 

 

 

 

 

반응형