-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds support for Mangasee (mangaseeonline.us)
- Loading branch information
Showing
4 changed files
with
258 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
from bs4 import BeautifulSoup | ||
from cum import config, exceptions | ||
from cum.scrapers.base import BaseChapter, BaseSeries, download_pool | ||
from functools import partial | ||
import concurrent.futures | ||
import json | ||
import re | ||
import requests | ||
import traceback | ||
|
||
|
||
class MangaseeSeries(BaseSeries): | ||
url_re = re.compile(r'https?://mangaseeonline\.us/manga/.+') | ||
|
||
def __init__(self, url, **kwargs): | ||
super().__init__(url, **kwargs) | ||
spage = requests.get(url) | ||
self.soup = BeautifulSoup(spage.text, config.get().html_parser) | ||
self.chapters = self.get_chapters() | ||
|
||
def get_chapters(self): | ||
try: | ||
rows = self.soup.find_all("a", class_="list-group-item") | ||
except AttributeError: | ||
raise exceptions.ScrapingError() | ||
chapters = [] | ||
for i, row in enumerate(rows): | ||
chap_num = re.match(r"Read .+ Chapter ([0-9\.]+) For Free Online", | ||
row["title"]).groups()[0] | ||
chap_url = "https://mangaseeonline.us" + row["href"] | ||
chap_name = row.find_all("span")[0].text | ||
chap_date = row.find_all("time")[0].text | ||
result = MangaseeChapter(name=self.name, | ||
alias=self.alias, | ||
chapter=chap_num, | ||
url=chap_url, | ||
title=chap_name, | ||
groups=[], | ||
upload_date=chap_date) | ||
chapters.append(result) | ||
return chapters | ||
|
||
@property | ||
def name(self): | ||
try: | ||
return re.match(r"Read (.+) Man[a-z]+ For Free \| MangaSee", | ||
self.soup.find_all("title")[0].text).groups()[0] | ||
except AttributeError: | ||
print(traceback.format_exc()) | ||
raise exceptions.ScrapingError | ||
|
||
|
||
class MangaseeChapter(BaseChapter): | ||
url_re = re.compile((r'https?://mangaseeonline\.us/' | ||
r'read-online/.+-chapter-[0-9\.]+-page-[0-9]+\.html')) | ||
upload_date = None | ||
uses_pages = True | ||
|
||
def download(self): | ||
if not getattr(self, "cpage", None): | ||
self.cpage = requests.get(self.url) | ||
if not getattr(self, "soup", None): | ||
self.soup = BeautifulSoup(self.cpage.text, | ||
config.get().html_parser) | ||
|
||
for script in self.soup.find_all("script"): | ||
if re.match("\n\tChapterArr=.+", script.text): | ||
image_list = script.text | ||
continue | ||
|
||
image_list = re.sub("\n\tChapterArr=", "", image_list) | ||
image_list = re.sub(";\n\t?", "", image_list) | ||
image_list = re.sub("PageArr=", ",", image_list) | ||
image_list = "[" + image_list + "]" | ||
image_list = json.loads(image_list)[1] | ||
pages = [] | ||
for image in image_list: | ||
if image != "CurPage": | ||
if re.match(".+blogspot.+", image_list[image]): | ||
image_list[image] = image_list[image].\ | ||
replace("http://", "https://") | ||
pages.append(image_list[image]) | ||
|
||
futures = [] | ||
files = [None] * len(pages) | ||
with self.progress_bar(pages) as bar: | ||
for i, page in enumerate(pages): | ||
retries = 0 | ||
while retries < 3: | ||
try: | ||
r = requests.get(page, stream=True) | ||
break | ||
except requests.exceptions.ConnectionError: | ||
retries += 1 | ||
if r.status_code != 200: | ||
r.close() | ||
raise ValueError | ||
fut = download_pool.submit(self.page_download_task, i, r) | ||
fut.add_done_callback(partial(self.page_download_finish, | ||
bar, files)) | ||
futures.append(fut) | ||
concurrent.futures.wait(futures) | ||
self.create_zip(files) | ||
|
||
def from_url(url): | ||
cpage = requests.get(url) | ||
soup = BeautifulSoup(cpage.text, config.get().html_parser) | ||
chap_num = soup.find_all("span", class_="CurChapter")[0].text | ||
iname = soup.find_all("a", class_="list-link")[0]["href"] | ||
series = MangaseeSeries("https://mangaseeonline.us" + iname) | ||
for chapter in series.chapters: | ||
if chapter.chapter == str(chap_num): | ||
return chapter | ||
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
from bs4 import BeautifulSoup | ||
from cum import config, exceptions | ||
from nose.tools import nottest | ||
from urllib.parse import urljoin | ||
import cumtest | ||
import os | ||
import requests | ||
import unittest | ||
import zipfile | ||
|
||
|
||
class TestMangasee(cumtest.CumTest): | ||
MANGASEE_URL = 'https://mangaseeonline.us/' | ||
|
||
def setUp(self): | ||
super().setUp() | ||
global mangasee | ||
from cum.scrapers import mangasee | ||
|
||
def tearDown(self): | ||
self.directory.cleanup() | ||
|
||
def get_five_latest_releases(self): | ||
r = requests.get(self.MANGASEE_URL) | ||
soup = BeautifulSoup(r.text, config.get().html_parser) | ||
chapters = soup.find_all("a", class_="latestSeries") | ||
links = [urljoin(self.MANGASEE_URL, x.get("href")) for x in chapters] | ||
return links[:5] | ||
|
||
@nottest | ||
def series_information_tester(self, data): | ||
series = mangasee.MangaseeSeries(data['url']) | ||
self.assertEqual(series.name, data['name']) | ||
self.assertEqual(series.alias, data['alias']) | ||
self.assertEqual(series.url, data['url']) | ||
self.assertIs(series.directory, None) | ||
self.assertEqual(len(series.chapters), len(data['chapters'])) | ||
for chapter in series.chapters: | ||
self.assertEqual(chapter.name, data['name']) | ||
self.assertEqual(chapter.alias, data['alias']) | ||
self.assertIn(chapter.chapter, data['chapters']) | ||
data['chapters'].remove(chapter.chapter) | ||
self.assertIs(chapter.directory, None) | ||
self.assertEqual(len(data['chapters']), 0) | ||
|
||
def test_chapter_download_latest(self): | ||
latest_releases = self.get_five_latest_releases() | ||
for release in latest_releases: | ||
try: | ||
chapter = mangasee.MangaseeChapter.from_url(release) | ||
except exceptions.ScrapingError as e: | ||
print('scraping error for {} - {}'.format(release, e)) | ||
continue | ||
else: | ||
chapter.get(use_db=False) | ||
|
||
def test_chapter_filename_decimal(self): | ||
URL = "https://mangaseeonline.us/read-online/" + \ | ||
"Citrus-S-A-B-U-R-O-Uta-chapter-20.5-page-1.html" | ||
chapter = mangasee.MangaseeChapter.from_url(URL) | ||
path = os.path.join(self.directory.name, 'Citrus SABURO Uta', | ||
'Citrus SABURO Uta - c020 x5 [Unknown].zip') | ||
self.assertEqual(chapter.chapter, '20.5') | ||
self.assertEqual(chapter.filename, path) | ||
|
||
def test_chapter_information_normal(self): | ||
URL = "https://mangaseeonline.us/read-online/" + \ | ||
"Ramen-Daisuki-Koizumi-San-chapter-18-page-1.html" | ||
chapter = mangasee.MangaseeChapter.from_url(URL) | ||
self.assertEqual(chapter.alias, 'ramen-daisuki-koizumi-san') | ||
self.assertTrue(chapter.available()) | ||
self.assertEqual(chapter.chapter, '18') | ||
self.assertEqual(chapter.name, 'Ramen Daisuki Koizumi-san') | ||
self.assertEqual(chapter.title, 'Chapter 18') | ||
path = os.path.join(self.directory.name, | ||
'Ramen Daisuki Koizumi-san', | ||
'Ramen Daisuki Koizumi-san - c018 [Unknown].zip') | ||
self.assertEqual(chapter.filename, path) | ||
chapter.download() | ||
self.assertTrue(os.path.isfile(path)) | ||
with zipfile.ZipFile(path) as chapter_zip: | ||
files = chapter_zip.infolist() | ||
self.assertEqual(len(files), 8) | ||
|
||
def test_chapter_information_chapterzero(self): | ||
URL = "https://mangaseeonline.us/read-online/" + \ | ||
"Inu-To-Hasami-Wa-Tsukaiyou-chapter-0-page-1.html" | ||
chapter = mangasee.MangaseeChapter.from_url(URL) | ||
self.assertEqual(chapter.alias, 'inu-to-hasami-wa-tsukaiyou') | ||
self.assertEqual(chapter.chapter, '0') | ||
self.assertEqual(chapter.name, 'Inu to Hasami wa Tsukaiyou') | ||
self.assertEqual(chapter.title, 'Chapter 0') | ||
path = os.path.join( | ||
self.directory.name, 'Inu to Hasami wa Tsukaiyou', | ||
'Inu to Hasami wa Tsukaiyou - c000 [Unknown].zip') | ||
self.assertEqual(chapter.filename, path) | ||
chapter.download() | ||
self.assertTrue(os.path.isfile(path)) | ||
with zipfile.ZipFile(path) as chapter_zip: | ||
files = chapter_zip.infolist() | ||
self.assertEqual(len(files), 51) | ||
|
||
def test_chapter_unavailable(self): | ||
URL = "https://mangaseeonline.us/read-online/" + \ | ||
"Oyasumi-Punpun-chapter-999-page-1.html" | ||
chapter = mangasee.MangaseeChapter(url=URL) | ||
self.assertFalse(chapter.available()) | ||
|
||
def test_series_oneword(self): | ||
data = {'alias': 'aria', | ||
'chapters': ['1', '2', '3', '4', '5', '6', '7', '8', | ||
'9', '10', '10.5', '11', '12', '13', '14', '15', | ||
'16', '17', '18', '19', '20', '21', '22', '23', | ||
'24', '25', '26', '27', '28', '29', '30', '30.5', | ||
'31', '32', '33', '34', '35', '35.5', '36', | ||
'37', '37.5', '38', '39', '40', '41', '42', '43', | ||
'44', '45', '45.5', '46', '47', '48', '49', | ||
'50', '50.5', '51', '52', '53', '54', '55', '56', | ||
'57', '57.5', '58', '59', '60', '60.5'], | ||
'name': 'Aria', | ||
'url': 'https://mangaseeonline.us/manga/Aria'} | ||
self.series_information_tester(data) | ||
|
||
def test_series_multiplewords(self): | ||
data = {'alias': 'prunus-girl', | ||
'chapters': ['1', '2', '3', '4', '5', '6', '7', '8', | ||
'9', '10', '11', '12', '13', '14', '15', | ||
'16', '17', '18', '19', '20', '21', '22', | ||
'23', '24', '25', '26', '27', '28', '29', '30', | ||
'31', '32', '32.5', '33', '34', '35', '36', '37', | ||
'38', '39', '40', '41', '42', '43'], | ||
'name': 'Prunus Girl', | ||
'url': 'https://mangaseeonline.us/manga/Prunus-Girl'} | ||
self.series_information_tester(data) | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |