From fd3776cc4aaa05a14ef496a671a65c22920ad171 Mon Sep 17 00:00:00 2001 From: Dan Dembinski Date: Fri, 19 Jun 2020 02:00:45 -0400 Subject: [PATCH] Initial Commit. Sort of works, but the parsing is rough. Only works for two translation sites right now, and the one site seems to have changed it's format at some point which breaks the loop. --- main.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..600206a --- /dev/null +++ b/main.py @@ -0,0 +1,45 @@ +import requests +import pprint as pp +from bs4 import BeautifulSoup as bs + +pg = 27 + +while pg > 1: + + baseURL = 'https://www.novelupdates.com/' + series = 'lazy-dungeon-master' + page = requests.get(baseURL + '/series/' + series + '?pg=' + str(pg)) + # pp.pprint(page.content) + soup = bs(page.content, 'html.parser') + result = soup.find(id='myTable') + + + for a in result.find_all('a', href=True, title=True): + if 'extnu' in a['href']: + print(a['title']) + print(a['href'].strip('//')) + followURL = 'http://' + a['href'].strip('//') + followPage = requests.get(followURL) + followSoup = bs(followPage.content, 'html.parser') + # test = followSoup.find('link', href=True) + if 'https://coronatranslation.blogspot.com/favicon.ico' in followSoup.find('link', href=True)['href']: + story = followSoup.find('div', class_='post-body entry-content float-container') + story_text = story.get_text() + # print(story_text) + f = open(a['title']+".txt", "w", encoding="utf-8") + f.write(story_text) + f.close() + elif 'Ziru' in followSoup.find('link', title="Ziru's Musings ยป Feed")['title']: + storyURL = followSoup.find('a', text='Read Chapter Here', href=True)['href'] + storyPage = requests.get(storyURL) + storySoup = bs(storyPage.content, 'html.parser') + story = storySoup.find('div', class_='elementor-element elementor-element-7ba99198 elementor-widget elementor-widget-theme-post-content') + story_text = story.get_text() + # print(story_text) + f = open(a['title'] + ".txt", "w", encoding="utf-8") + f.write(story_text) + f.close() + else: + print('fuck') + pg = pg-1 + # exit() \ No newline at end of file