Skip to content

Commit 715d420

Browse files
authored
Merge branch 'mikf:master' into Enhancement_Filter_Event
2 parents 5ab1355 + 81ff021 commit 715d420

17 files changed

+200
-40
lines changed

Diff for: docs/configuration.rst

+40
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,7 @@ Default
401401
``soundgasm``,
402402
``urlgalleries``,
403403
``vk``,
404+
``webtoons``,
404405
``weebcentral``,
405406
``xfolio``,
406407
``zerochan``
@@ -5284,6 +5285,34 @@ Description
52845285
Note: This requires 1 additional HTTP request per submission.
52855286

52865287

5288+
extractor.webtoons.quality
5289+
--------------------------
5290+
Type
5291+
* ``integer``
5292+
* ``string``
5293+
* ``object`` (`ext` -> `type`)
5294+
5295+
Default
5296+
``"original"``
5297+
Example
5298+
* ``90``
5299+
* ``"q50"``
5300+
* ``{"jpg": "q80", "jpeg": "q80", "png": false}``
5301+
Description
5302+
Controls the quality of downloaded files by modifying URLs' ``type`` parameter.
5303+
5304+
``"original"``
5305+
Download minimally compressed versions of JPG files
5306+
any ``integer``
5307+
Use ``"q<VALUE>"`` as ``type`` parameter for JPEG files
5308+
any ``string``
5309+
Use this value as ``type`` parameter for JPEG files
5310+
any ``object``
5311+
| Use the given values as ``type`` parameter for URLs with the specified extensions
5312+
| - Set a value to ``false`` to completely remove these extension's ``type`` parameter
5313+
| - Omit an extension to leave its URLs unchanged
5314+
5315+
52875316
extractor.weibo.gifs
52885317
--------------------
52895318
Type
@@ -6611,6 +6640,17 @@ Description
66116640
Note: `metadata.extension`_ is ignored if this option is set.
66126641

66136642

6643+
metadata.metadata-path
6644+
----------------------
6645+
Type
6646+
``string``
6647+
Example
6648+
``"_meta_path"``
6649+
Description
6650+
Insert the path of generated files
6651+
into metadata dictionaries as the given name.
6652+
6653+
66146654
metadata.event
66156655
--------------
66166656
Type

Diff for: docs/formatting.md

+13-1
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,18 @@ Conversion specifiers allow to *convert* the value to a different form or type.
176176
<td></td>
177177
<td></td>
178178
</tr>
179+
<tr>
180+
<td align="center"><code>i</code></td>
181+
<td>Convert value to <a href="https://docs.python.org/3/library/functions.html#int"><code>int</code></a></td>
182+
<td></td>
183+
<td></td>
184+
</tr>
185+
<tr>
186+
<td align="center"><code>f</code></td>
187+
<td>Convert value to <a href="https://docs.python.org/3/library/functions.html#float"><code>float</code></a></td>
188+
<td></td>
189+
<td></td>
190+
</tr>
179191
</tbody>
180192
</table>
181193

@@ -233,7 +245,7 @@ Format specifiers can be used for advanced formatting by using the options provi
233245
<td><code>Foo&nbsp;Bar</code></td>
234246
</tr>
235247
<tr>
236-
<td><code>{foo:L6/&nbsp;.../}</code></td>
248+
<td><code>{foo:X6/&nbsp;.../}</code></td>
237249
<td><code>Fo&nbsp;...</code></td>
238250
</tr>
239251
<tr>

Diff for: docs/gallery-dl.conf

+9-3
Original file line numberDiff line numberDiff line change
@@ -732,11 +732,13 @@
732732
"api-key" : null,
733733
"metadata": false
734734
},
735-
"weebcentral":
735+
"webtoons":
736736
{
737-
"sleep-request": "0.5-1.5"
737+
"sleep-request": "0.5-1.5",
738+
739+
"quality": "original"
738740
},
739-
"xfolio":
741+
"weebcentral":
740742
{
741743
"sleep-request": "0.5-1.5"
742744
},
@@ -751,6 +753,10 @@
751753
"retweets" : false,
752754
"videos" : true
753755
},
756+
"xfolio":
757+
{
758+
"sleep-request": "0.5-1.5"
759+
},
754760
"ytdl":
755761
{
756762
"cmdline-args": null,

Diff for: gallery_dl/extractor/discord.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,10 @@ def extract_message_text(self, message):
4949
text_content.append(field.get("name", ""))
5050
text_content.append(field.get("value", ""))
5151

52-
text_content.append(embed.get("footer", {}).get("text", ""))
52+
try:
53+
text_content.append(embed["footer"]["text"])
54+
except Exception:
55+
pass
5356

5457
if message.get("poll"):
5558
text_content.append(message["poll"]["question"]["text"])
@@ -224,10 +227,12 @@ def parse_server(self, server):
224227
return self.server_metadata
225228

226229
def build_server_and_channels(self, server_id):
227-
server = self.api.get_server(server_id)
228-
self.parse_server(server)
230+
self.parse_server(self.api.get_server(server_id))
229231

230-
for channel in self.api.get_server_channels(server_id):
232+
for channel in sorted(
233+
self.api.get_server_channels(server_id),
234+
key=lambda ch: ch["type"] != 4
235+
):
231236
self.parse_channel(channel)
232237

233238

@@ -353,7 +358,8 @@ def _method(_):
353358
"limit": MESSAGES_BATCH,
354359
"before": before
355360
})
356-
before = messages[-1]["id"]
361+
if messages:
362+
before = messages[-1]["id"]
357363
return messages
358364

359365
return self._pagination(_method, MESSAGES_BATCH)

Diff for: gallery_dl/extractor/gelbooru.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,12 @@ def _file_url(post):
114114
md5 = post["md5"]
115115
path = "/images/{}/{}/{}.webm".format(md5[0:2], md5[2:4], md5)
116116
post["_fallback"] = GelbooruBase._video_fallback(path)
117-
url = "https://img3.gelbooru.com" + path
117+
url = "https://img4.gelbooru.com" + path
118118
return url
119119

120120
@staticmethod
121121
def _video_fallback(path):
122+
yield "https://img3.gelbooru.com" + path
122123
yield "https://img2.gelbooru.com" + path
123124
yield "https://img1.gelbooru.com" + path
124125

Diff for: gallery_dl/extractor/pixiv.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import itertools
1616
import hashlib
1717

18-
BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
18+
BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?ph?ixiv\.net"
1919
USER_PATTERN = BASE_PATTERN + r"/(?:en/)?users/(\d+)"
2020

2121

@@ -531,7 +531,7 @@ def items(self):
531531
class PixivWorkExtractor(PixivExtractor):
532532
"""Extractor for a single pixiv work/illustration"""
533533
subcategory = "work"
534-
pattern = (r"(?:https?://)?(?:(?:www\.|touch\.)?pixiv\.net"
534+
pattern = (r"(?:https?://)?(?:(?:www\.|touch\.)?ph?ixiv\.net"
535535
r"/(?:(?:en/)?artworks/"
536536
r"|member_illust\.php\?(?:[^&]+&)*illust_id=)(\d+)"
537537
r"|(?:i(?:\d+\.pixiv|\.pximg)\.net"

Diff for: gallery_dl/extractor/webtoons.py

+29-5
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class WebtoonsBase():
2020
category = "webtoons"
2121
root = "https://www.webtoons.com"
2222
cookies_domain = ".webtoons.com"
23+
request_interval = (0.5, 1.5)
2324

2425
def setup_agegate_cookies(self):
2526
self.cookies_update({
@@ -98,11 +99,34 @@ def metadata(self, page):
9899
}
99100

100101
def images(self, page):
101-
return [
102-
(url.replace("://webtoon-phinf.", "://swebtoon-phinf."), None)
103-
for url in text.extract_iter(
104-
page, 'class="_images" data-url="', '"')
105-
]
102+
quality = self.config("quality")
103+
if quality is None or quality == "original":
104+
quality = {"jpg": False, "jpeg": False, "webp": False}
105+
elif not quality:
106+
quality = None
107+
elif isinstance(quality, str):
108+
quality = {"jpg": quality, "jpeg": quality}
109+
elif isinstance(quality, int):
110+
quality = "q" + str(quality)
111+
quality = {"jpg": quality, "jpeg": quality}
112+
elif not isinstance(quality, dict):
113+
quality = None
114+
115+
results = []
116+
for url in text.extract_iter(
117+
page, 'class="_images" data-url="', '"'):
118+
119+
if quality is not None:
120+
path, _, query = url.rpartition("?")
121+
type = quality.get(path.rpartition(".")[2].lower())
122+
if type is False:
123+
url = path
124+
elif type:
125+
url = "{}?type={}".format(path, type)
126+
127+
url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
128+
results.append((url, None))
129+
return results
106130

107131

108132
class WebtoonsComicExtractor(WebtoonsBase, Extractor):

Diff for: gallery_dl/formatter.py

+2
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,8 @@ def __getitem__(key):
495495
"s": str,
496496
"r": repr,
497497
"a": ascii,
498+
"i": int,
499+
"f": float,
498500
}
499501
_FORMAT_SPECIFIERS = {
500502
"?": _parse_optional,

Diff for: gallery_dl/postprocessor/metadata.py

+4
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ def __init__(self, job, options):
108108
self.omode = options.get("open", omode)
109109
self.encoding = options.get("encoding", "utf-8")
110110
self.skip = options.get("skip", False)
111+
self.meta_path = options.get("metadata-path")
111112

112113
def run(self, pathfmt):
113114
archive = self.archive
@@ -120,6 +121,9 @@ def run(self, pathfmt):
120121
directory = self._directory(pathfmt)
121122
path = directory + self._filename(pathfmt)
122123

124+
if self.meta_path is not None:
125+
pathfmt.kwdict[self.meta_path] = path
126+
123127
if self.skip and os.path.exists(path):
124128
return
125129

Diff for: gallery_dl/postprocessor/ugoira.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,7 @@ def convert_from_zip(self, pathfmt):
156156
return self.log.debug("", exc_info=exc)
157157

158158
if self.convert(pathfmt, tempdir):
159-
if self.delete:
160-
pathfmt.delete = True
161-
elif pathfmt.extension != "zip":
162-
self.log.info(pathfmt.filename)
163-
pathfmt.set_extension("zip")
164-
pathfmt.build_path()
159+
pathfmt.delete = self.delete
165160

166161
def convert_from_files(self, pathfmt):
167162
if not self._convert_files:

Diff for: test/results/deviantart.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -903,7 +903,7 @@
903903

904904
{
905905
"#url" : "https://www.deviantart.com/justatest235723/art/1133021832",
906-
"#comment" : "mutliple images (#6653)",
906+
"#comment" : "multiple images (#6653)",
907907
"#category": ("", "deviantart", "deviation"),
908908
"#class" : deviantart.DeviantartDeviationExtractor,
909909
"#archive" : False,

Diff for: test/results/gelbooru.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
"#pattern" : r"https://img\d\.gelbooru\.com/images/../../[0-9a-f]{32}\.jpg",
3737
"#range" : "196-204",
3838
"#count" : 9,
39-
"#sha1_url": "845a61aa1f90fb4ced841e8b7e62098be2e967bf",
39+
"#sha1_url": "75326d788049459aff46c537fe53d6ea31a2305e",
4040
},
4141

4242
{
@@ -67,11 +67,11 @@
6767
"#category": ("booru", "gelbooru", "favorite"),
6868
"#class" : gelbooru.GelbooruFavoriteExtractor,
6969
"#urls" : (
70-
"https://img3.gelbooru.com/images/5d/30/5d30fc056ed8668616b3c440df9bac89.jpg",
71-
"https://img3.gelbooru.com/images/4c/2d/4c2da867ed643acdadd8105177dcdaf0.png",
72-
"https://img3.gelbooru.com/images/c8/26/c826f3cb90d9aaca8d0632a96bf4abe8.jpg",
73-
"https://img3.gelbooru.com/images/c1/fe/c1fe59c0bc8ce955dd353544b1015d0c.jpg",
74-
"https://img3.gelbooru.com/images/e6/6d/e66d8883c184f5d3b2591dfcdf0d007c.jpg",
70+
"https://img4.gelbooru.com/images/5d/30/5d30fc056ed8668616b3c440df9bac89.jpg",
71+
"https://img4.gelbooru.com/images/4c/2d/4c2da867ed643acdadd8105177dcdaf0.png",
72+
"https://img4.gelbooru.com/images/c8/26/c826f3cb90d9aaca8d0632a96bf4abe8.jpg",
73+
"https://img4.gelbooru.com/images/c1/fe/c1fe59c0bc8ce955dd353544b1015d0c.jpg",
74+
"https://img4.gelbooru.com/images/e6/6d/e66d8883c184f5d3b2591dfcdf0d007c.jpg",
7575
),
7676
},
7777

@@ -81,11 +81,11 @@
8181
"#class" : gelbooru.GelbooruFavoriteExtractor,
8282
"#options" : {"order-posts": "reverse"},
8383
"#urls" : (
84-
"https://img3.gelbooru.com/images/e6/6d/e66d8883c184f5d3b2591dfcdf0d007c.jpg",
85-
"https://img3.gelbooru.com/images/c1/fe/c1fe59c0bc8ce955dd353544b1015d0c.jpg",
86-
"https://img3.gelbooru.com/images/c8/26/c826f3cb90d9aaca8d0632a96bf4abe8.jpg",
87-
"https://img3.gelbooru.com/images/4c/2d/4c2da867ed643acdadd8105177dcdaf0.png",
88-
"https://img3.gelbooru.com/images/5d/30/5d30fc056ed8668616b3c440df9bac89.jpg",
84+
"https://img4.gelbooru.com/images/e6/6d/e66d8883c184f5d3b2591dfcdf0d007c.jpg",
85+
"https://img4.gelbooru.com/images/c1/fe/c1fe59c0bc8ce955dd353544b1015d0c.jpg",
86+
"https://img4.gelbooru.com/images/c8/26/c826f3cb90d9aaca8d0632a96bf4abe8.jpg",
87+
"https://img4.gelbooru.com/images/4c/2d/4c2da867ed643acdadd8105177dcdaf0.png",
88+
"https://img4.gelbooru.com/images/5d/30/5d30fc056ed8668616b3c440df9bac89.jpg",
8989
),
9090
},
9191

Diff for: test/results/pixiv.py

+24
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,18 @@
101101
"#class" : pixiv.PixivArtworksExtractor,
102102
},
103103

104+
{
105+
"#url" : "https://www.phixiv.net/member_illust.php?id=173530",
106+
"#category": ("", "pixiv", "artworks"),
107+
"#class" : pixiv.PixivArtworksExtractor,
108+
},
109+
110+
{
111+
"#url" : "https://phixiv.net/en/users/56514424/artworks",
112+
"#category": ("", "pixiv", "artworks"),
113+
"#class" : pixiv.PixivArtworksExtractor,
114+
},
115+
104116
{
105117
"#url" : "https://www.pixiv.net/en/users/173530/avatar",
106118
"#category": ("", "pixiv", "avatar"),
@@ -341,6 +353,18 @@
341353
"#class" : pixiv.PixivWorkExtractor,
342354
},
343355

356+
{
357+
"#url" : "https://www.phixiv.net/en/artworks/966412",
358+
"#category": ("", "pixiv", "work"),
359+
"#class" : pixiv.PixivWorkExtractor,
360+
},
361+
362+
{
363+
"#url" : "https://phixiv.net/member_illust.php?mode=medium&illust_id=966412",
364+
"#category": ("", "pixiv", "work"),
365+
"#class" : pixiv.PixivWorkExtractor,
366+
},
367+
344368
{
345369
"#url" : "https://www.pixiv.net/en/artworks/unlisted/eE3fTYaROT9IsZmep386",
346370
"#class" : pixiv.PixivUnlistedExtractor,

Diff for: test/results/reddit.py

+8
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,14 @@
235235
),
236236
},
237237

238+
{
239+
"#url" : "https://old.reddit.com/r/redgifs/comments/gfvw9v/redgifs_links_on_reddit_see_comment/",
240+
"#comment" : "redgifs embed",
241+
"#category": ("", "reddit", "submission"),
242+
"#class" : reddit.RedditSubmissionExtractor,
243+
"#urls" : "https://redgifs.com/watch/foolishforkedabyssiniancat",
244+
},
245+
238246
{
239247
"#url" : "https://old.reddit.com/r/lavaporn/comments/2a00np/",
240248
"#category": ("", "reddit", "submission"),

0 commit comments

Comments
 (0)