From fd3776cc4aaa05a14ef496a671a65c22920ad171 Mon Sep 17 00:00:00 2001
From: Dan Dembinski <Dan.Dembinski@Gmail.com>
Date: Fri, 19 Jun 2020 02:00:45 -0400
Subject: [PATCH] Initial Commit. Sort of works, but the parsing is rough. Only
 works for two translation sites right now, and the one site seems to have
 changed it's format at some point which breaks the loop.

---
 main.py | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 main.py

diff --git a/main.py b/main.py
new file mode 100644
index 0000000..600206a
--- /dev/null
+++ b/main.py
@@ -0,0 +1,45 @@
+import requests
+import pprint as pp
+from bs4 import BeautifulSoup as bs
+
+pg = 27
+
+while pg > 1:
+
+    baseURL = 'https://www.novelupdates.com/'
+    series = 'lazy-dungeon-master'
+    page = requests.get(baseURL + '/series/' + series + '?pg=' + str(pg))
+    # pp.pprint(page.content)
+    soup = bs(page.content, 'html.parser')
+    result = soup.find(id='myTable')
+
+
+    for a in result.find_all('a', href=True, title=True):
+        if 'extnu' in a['href']:
+            print(a['title'])
+            print(a['href'].strip('//'))
+            followURL = 'http://' + a['href'].strip('//')
+            followPage = requests.get(followURL)
+            followSoup = bs(followPage.content, 'html.parser')
+            # test = followSoup.find('link', href=True)
+            if 'https://coronatranslation.blogspot.com/favicon.ico' in followSoup.find('link', href=True)['href']:
+                story = followSoup.find('div', class_='post-body entry-content float-container')
+                story_text = story.get_text()
+                # print(story_text)
+                f = open(a['title']+".txt", "w", encoding="utf-8")
+                f.write(story_text)
+                f.close()
+            elif 'Ziru' in followSoup.find('link', title="Ziru's Musings » Feed")['title']:
+                storyURL = followSoup.find('a', text='Read Chapter Here', href=True)['href']
+                storyPage = requests.get(storyURL)
+                storySoup = bs(storyPage.content, 'html.parser')
+                story = storySoup.find('div', class_='elementor-element elementor-element-7ba99198 elementor-widget elementor-widget-theme-post-content')
+                story_text = story.get_text()
+                # print(story_text)
+                f = open(a['title'] + ".txt", "w", encoding="utf-8")
+                f.write(story_text)
+                f.close()
+            else:
+                print('fuck')
+    pg = pg-1
+        # exit()
\ No newline at end of file