import cloudscraper import pprint as pp from bs4 import BeautifulSoup as bs import os.path scraper = cloudscraper.create_scraper() pg = 25 while pg > 1: baseURL = 'https://www.novelupdates.com/' series = 'lazy-dungeon-master' page = scraper.get(baseURL + '/series/' + series + '?pg=' + str(pg)) print(baseURL + 'series/' + series + '?pg=' + str(pg)) # pp.pprint(page.content) soup = bs(page.content, 'html.parser') # print(soup) result = soup.find(id='myTable') # print(result) for a in result.find_all('a', href=True, title=True): if 'extnu' in a['href']: # print(a['title']) # print(a['href'].strip('//')) if os.path.exists(a['title']+'.txt'): print('skipping', a['title']) elif a['title'][0].lower() == 'c': print(a['title']) print(a['href'].strip('//')) followURL = 'http://' + a['href'].strip('//') followPage = scraper.get(followURL) followSoup = bs(followPage.content, 'html.parser') if 'http://moonbunnycafe.com/wp-content/uploads/2015/07/bunbun.jpeg' in followSoup.find('link', href=True)['href']: break # test = followSoup.find('link', href=True) elif 'https://coronatranslation.blogspot.com/favicon.ico' in followSoup.find('link', href=True)['href']: story = followSoup.find('div', class_='post-body entry-content float-container') story_text = story.get_text() # print(story_text) f = open(a['title']+".txt", "w", encoding="utf-8") f.write(story_text) f.close() elif 'Ziru' in followSoup.find('link', title="Ziru's Musings ยป Feed")['title']: # storyURL = followSoup.find('a', text='Read Chapter Here', href=True)['href'] # storyPage = scraper.get(storyURL) # storySoup = bs(storyPage.content, 'html.parser') # story = storySoup.find('div', class_='elementor-element elementor-element-7ba99198 elementor-widget elementor-widget-theme-post-content') story = followSoup.find('div', class_='elementor-element elementor-element-7ba99198 elementor-widget elementor-widget-theme-post-content') if story is not None: story_text = story.get_text() # print(story_text) f = open(a['title'] + ".txt", "w", encoding="utf-8") f.write(story_text) f.close() else: print('fuck') else: print('junk') pg = pg-1 # exit()