Initial Commit. Sort of works, but the parsing is rough. Only works for two translation sites right now, and the one site seems to have changed it's format at some point which breaks the loop.
This commit is contained in:
45
main.py
Normal file
45
main.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import requests
|
||||
import pprint as pp
|
||||
from bs4 import BeautifulSoup as bs
|
||||
|
||||
pg = 27
|
||||
|
||||
while pg > 1:
|
||||
|
||||
baseURL = 'https://www.novelupdates.com/'
|
||||
series = 'lazy-dungeon-master'
|
||||
page = requests.get(baseURL + '/series/' + series + '?pg=' + str(pg))
|
||||
# pp.pprint(page.content)
|
||||
soup = bs(page.content, 'html.parser')
|
||||
result = soup.find(id='myTable')
|
||||
|
||||
|
||||
for a in result.find_all('a', href=True, title=True):
|
||||
if 'extnu' in a['href']:
|
||||
print(a['title'])
|
||||
print(a['href'].strip('//'))
|
||||
followURL = 'http://' + a['href'].strip('//')
|
||||
followPage = requests.get(followURL)
|
||||
followSoup = bs(followPage.content, 'html.parser')
|
||||
# test = followSoup.find('link', href=True)
|
||||
if 'https://coronatranslation.blogspot.com/favicon.ico' in followSoup.find('link', href=True)['href']:
|
||||
story = followSoup.find('div', class_='post-body entry-content float-container')
|
||||
story_text = story.get_text()
|
||||
# print(story_text)
|
||||
f = open(a['title']+".txt", "w", encoding="utf-8")
|
||||
f.write(story_text)
|
||||
f.close()
|
||||
elif 'Ziru' in followSoup.find('link', title="Ziru's Musings » Feed")['title']:
|
||||
storyURL = followSoup.find('a', text='Read Chapter Here', href=True)['href']
|
||||
storyPage = requests.get(storyURL)
|
||||
storySoup = bs(storyPage.content, 'html.parser')
|
||||
story = storySoup.find('div', class_='elementor-element elementor-element-7ba99198 elementor-widget elementor-widget-theme-post-content')
|
||||
story_text = story.get_text()
|
||||
# print(story_text)
|
||||
f = open(a['title'] + ".txt", "w", encoding="utf-8")
|
||||
f.write(story_text)
|
||||
f.close()
|
||||
else:
|
||||
print('fuck')
|
||||
pg = pg-1
|
||||
# exit()
|
||||
Reference in New Issue
Block a user