Pasted as Plain Text [Remove this paste ]
Description: No description
URL: http://rafb.ath.cx/pastes/5cp1oY82.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#vim: set fileencoding=<utf-8> :
import sys
reload(sys) 
sys.setdefaultencoding('utf8') 
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
from time import sleep
from selenium.webdriver.common.by import By
print sys.stdout.encoding
 
#browser = webdriver.Firefox() # Get local session of firefox
browser = webdriver.Chrome() # Get local session of chrome
browser.get("http://www.seriesyonkis.com/ultimos-capitulos.php") # Load page
 
 
series=[]
#extract all series
for elemento in browser.find_elements_by_class_name("leftmenugroup")[0].find_elements_by_xpath(".//a"):
	series.append([elemento.get_attribute("href"),elemento.text])
	print elemento.get_attribute("href") + " " + elemento.text	
print series 
#series.append(["http://www.seriesyonkis.com/serie/zen/","ZEN"])
#series.append(["http://www.seriesyonkis.com/serie/zoey-101/","Zoey 101"])
 
for serie in series:
	#print "analizando " + serie.get_attribute("href") + " Serie " + serie.text 
	#print "analizando " + serie.text
	capitulos=[]
	if serie[1] <> "Series:" and serie[1] <> "películas aquí":
		browser.get(serie[0])
		for elemento in browser.find_elements_by_tag_name("h5"):
			#print elemento.find_elements_by_xpath(".//a")[0].get_attribute("href")
			#print elemento.find_elements_by_xpath(".//a")[0].text
			capitulos.append([elemento.find_elements_by_xpath(".//a")[0].get_attribute("href"),elemento.find_elements_by_xpath(".//a")[0].text])
		print capitulos
		for capitulo in capitulos:
 
			browser.get(capitulo[0])
			rows=browser.find_elements_by_tag_name("tr")
			links=[]
			for row in rows:
				#print row.text
				for j in row.find_elements_by_xpath(".//a"):
					#print "-----------------------------------------------------------------------------------------"
					#print row.find_elements_by_xpath(".//td")[0].text + " 0 linea"
					#print row.find_elements_by_xpath(".//td")[1].text + " 1 linea"
					#print row.find_elements_by_xpath(".//td")[2].text + " idioma"
					#print row.find_elements_by_xpath(".//td")[3].text + " subtitulos"
					#print row.find_elements_by_xpath(".//td")[5].text + " informacion (duracion/formato/tamanyo)"
					links.append([row.find_elements_by_xpath(".//td")[2].text,row.find_elements_by_xpath(".//td")[3].text,row.find_elements_by_xpath(".//td")[5].text,j.get_attribute("href")])
 
			#print links
			correctlinks=[]
			for z in links:
				browser.get(z[3])
				urls=browser.find_elements_by_xpath("/html/body/span/b/a")
				for url in urls:
 
					z[3]=url.get_attribute("href")
					correctlinks.append([z[0],z[1],z[2],url.get_attribute("href")])
				urls=browser.find_elements_by_xpath("/html/body/center[2]/table/tbody/tr/td/center/h3[1]/a")
				for url in urls:
					z[3]=url.get_attribute("href")
					correctlinks.append([z[0],z[1],z[2],url.get_attribute("href")])
				urls=browser.find_elements_by_xpath("/html/body/ol/li/span[2]/b/a")
				for url in urls:
					z[3]=url.get_attribute("href")
					correctlinks.append([z[0],z[1],z[2],url.get_attribute("href")])
 
			for y in correctlinks:
				print capitulo[1] + " " + str(y)
			print "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
 
browser.close()