45 lines
1.9 KiB
Python
45 lines
1.9 KiB
Python
import requests
|
|
import pprint as pp
|
|
from bs4 import BeautifulSoup as bs
|
|
|
|
pg = 27
|
|
|
|
while pg > 1:
|
|
|
|
baseURL = 'https://www.novelupdates.com/'
|
|
series = 'lazy-dungeon-master'
|
|
page = requests.get(baseURL + '/series/' + series + '?pg=' + str(pg))
|
|
# pp.pprint(page.content)
|
|
soup = bs(page.content, 'html.parser')
|
|
result = soup.find(id='myTable')
|
|
|
|
|
|
for a in result.find_all('a', href=True, title=True):
|
|
if 'extnu' in a['href']:
|
|
print(a['title'])
|
|
print(a['href'].strip('//'))
|
|
followURL = 'http://' + a['href'].strip('//')
|
|
followPage = requests.get(followURL)
|
|
followSoup = bs(followPage.content, 'html.parser')
|
|
# test = followSoup.find('link', href=True)
|
|
if 'https://coronatranslation.blogspot.com/favicon.ico' in followSoup.find('link', href=True)['href']:
|
|
story = followSoup.find('div', class_='post-body entry-content float-container')
|
|
story_text = story.get_text()
|
|
# print(story_text)
|
|
f = open(a['title']+".txt", "w", encoding="utf-8")
|
|
f.write(story_text)
|
|
f.close()
|
|
elif 'Ziru' in followSoup.find('link', title="Ziru's Musings » Feed")['title']:
|
|
storyURL = followSoup.find('a', text='Read Chapter Here', href=True)['href']
|
|
storyPage = requests.get(storyURL)
|
|
storySoup = bs(storyPage.content, 'html.parser')
|
|
story = storySoup.find('div', class_='elementor-element elementor-element-7ba99198 elementor-widget elementor-widget-theme-post-content')
|
|
story_text = story.get_text()
|
|
# print(story_text)
|
|
f = open(a['title'] + ".txt", "w", encoding="utf-8")
|
|
f.write(story_text)
|
|
f.close()
|
|
else:
|
|
print('fuck')
|
|
pg = pg-1
|
|
# exit() |