본문 바로가기

파이썬(PYTHON)

트위터 아이디 수집 (collecting twitter ID) - ver 0.1

728x90
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import numpy as np
import random
import pandas as pd


options = webdriver.ChromeOptions()
options.add_argument("headless")
options.add_experimental_option("excludeSwitches", ["enable-logging"])
driver = webdriver.Chrome('chromedriver_4844.exe', options=options)

driver.get(url)
driver.implicitly_wait(10)

ID  = []
while True:
    driver.find_element_by_tag_name('body').send_keys(Keys.PAGE_DOWN)
    driver.find_element_by_tag_name('body').send_keys(Keys.PAGE_DOWN)
    try:
        for page in range(1, 10):
            xpath_id = '//*[@id="react-root"]/div/div/div[2]/main/div/div/div/div/div/div[2]/div/div/section/div/div/div[' + str(page) + ']'
            dd = driver.find_element_by_xpath(xpath_id)
            driver.implicitly_wait(5)
           
            tt = dd.text
           
            pos_1 = tt.find('@')
            pos_2 = tt.find('·')
            id = tt[pos_1+1:pos_2-1]
            if len(id) < 20:
                ID.append(id)
                id_uni = np.unique(ID)
           
            if len(id_uni)%10 == 0:
                id_idx = random.randint(1, len(id_uni))
                twitter_id = id_uni[id_idx]
                url = 'https://twitter.com/' + twitter_id
                print(url)
                driver.get(url)
                driver.implicitly_wait(10)
               
        print(len(id_uni))
    except:
        pass
728x90