/ / Nepracuje selén s & v url - python, selén, škrabanie po webe

Neexistuje selén s & in url - python, selén, škrabanie na webe

Prechádzam adresy URL

https://www.youtube.com/trendsdashboard#loc0=ind

Vyššie uvedené funguje dobre, ale na tej istej stránke s viac ako jedným odkazom, t.j.

https://www.youtube.com/trendsdashboard#loc0=ind&feed=shared

dáva to prázdny výsledok. To isté platí pre:

https://www.youtube.com/trendsdashboard#loc0=ind&feed=shared&gen0=male https://www.youtube.com/trendsdashboard#loc0=ind&feed=shared&gen0=female .

def getVideoTrend(self):
binary = FirefoxBinary("/usr/bin/firefox")
driver = webdriver.Firefox(firefox_binary=binary)
driver.get("https://www.youtube.com/trendsdashboard#loc0=ind&feed=shared")
assert "YouTube Trends" in driver.title
video_trend = []
for s in driver.find_elements_by_class_name("video-item"):
print s
video = {}
videourl = s.find_element_by_css_selector("a").get_attribute("href")
video["url"] = videourl
videotitle = s.find_element_by_css_selector("a").get_attribute("alt")
video["title"] = videotitle
video_trend.append(video)
print video_trend

odpovede:

1 pre odpoveď č. 1

Problém môže byť v tom, že nečakáte na načítanie prvkov do DOM.

Skúste urobiť niečo také:

Importovanie selénu webdriverwait

from selenium.webdriver.support.ui import WebDriverWait

Počkajte, kým sa prvky načítajú, kým ich nezískate.

elements = WebDriverWait(driver, 10).until(lambda driver: driver.find_elements_by_class_name("video-item"))
for s in elements:
print s.text
video = {}
videourl = s.find_element_by_css_selector("a").get_attribute("href")
video["url"] = videourl
videotitle = s.find_element_by_css_selector("a").get_attribute("alt")
video["title"] = videotitle
video_trend.append(video)
print video_trend

Tento kód funguje pre mňa

driver = webdriver.Firefox()

class Test(object):
url1 = "https://www.youtube.com/trendsdashboard#loc0=ind"
url2 = "https://www.youtube.com/trendsdashboard#loc0=ind&feed=shared"
url3 = "https://www.youtube.com/trendsdashboard#loc0=ind&feed=shared&gen0=male"

def get_video_trend(url):
driver.get(url)
assert "YouTube Trends" in driver.title
video_trend = []
element = WebDriverWait(driver, 10).until(lambda driver: driver.find_elements_by_class_name("video-item"))
for s in element:
print (s.text)
video = {}
videourl = s.find_element_by_css_selector("a").get_attribute("href")
video["url"] = videourl
videotitle = s.find_element_by_css_selector("a").get_attribute("alt")
video["title"] = videotitle
video_trend.append(video)
print (video_trend)

if __name__ == "__main__":
get_video_trend(Test.url1)
get_video_trend(Test.url2)
get_video_trend(Test.url3)