get all tags inbetween h2 tags
name_query = u"//*[name()=name(//*[@id='"+id+"'])]"
all = response.xpath(name_query)
for selector in all.getall():
if self.id in selector:
position = all.getall().index(selector)
balise = "h" + all.getall()[position].split("")[1].split("<")[0]
query = u"//*[preceding-sibling::"+balise+"[1] ='"+title+"' and following-sibling::"+balise+"]"
self.log('query = '+query)
results = response.xpath(query)
results.pop(len(results)-1)
with open(filename,'wb') as f:
for text in results.css("::text").getall():
f.write(text.encode('utf-8')+"\n")
|