Merge branch 'mikf:master' into Enhancement_Filter_Event

BishopRed90 · web-flow · commit 715d42016404 · 2025-04-12T13:22:34.000-05:00
diff --git a/docs/configuration.rst b/docs/configuration.rst
@@ -401,6 +401,7 @@ Default
         ``soundgasm``,
         ``urlgalleries``,
         ``vk``,
+        ``webtoons``,
         ``weebcentral``,
         ``xfolio``,
         ``zerochan``
@@ -5284,6 +5285,34 @@ Description
     Note: This requires 1 additional HTTP request per submission.
 
 
+extractor.webtoons.quality
+--------------------------
+Type
+    * ``integer``
+    * ``string``
+    * ``object`` (`ext` -> `type`)
+
+Default
+    ``"original"``
+Example
+    * ``90``
+    * ``"q50"``
+    * ``{"jpg": "q80", "jpeg": "q80", "png": false}``
+Description
+    Controls the quality of downloaded files by modifying URLs' ``type`` parameter.
+
+    ``"original"``
+        Download minimally compressed versions of JPG files
+    any ``integer``
+        Use ``"q<VALUE>"`` as ``type`` parameter for JPEG files
+    any ``string``
+        Use this value as ``type`` parameter for JPEG files
+    any ``object``
+        | Use the given values as ``type`` parameter for URLs with the specified extensions
+        | - Set a value to ``false`` to completely remove these extension's ``type`` parameter
+        | - Omit an extension to leave its URLs unchanged
+
+
 extractor.weibo.gifs
 --------------------
 Type
@@ -6611,6 +6640,17 @@ Description
     Note: `metadata.extension`_ is ignored if this option is set.
 
 
+metadata.metadata-path
+----------------------
+Type
+    ``string``
+Example
+    ``"_meta_path"``
+Description
+    Insert the path of generated files
+    into metadata dictionaries as the given name.
+
+
 metadata.event
 --------------
 Type
diff --git a/docs/formatting.md b/docs/formatting.md
@@ -176,6 +176,18 @@ Conversion specifiers allow to *convert* the value to a different form or type.
     <td></td>
     <td></td>
 </tr>
+<tr>
+    <td align="center"><code>i</code></td>
+    <td>Convert value to <a href="https://docs.python.org/3/library/functions.html#int"><code>int</code></a></td>
+    <td></td>
+    <td></td>
+</tr>
+<tr>
+    <td align="center"><code>f</code></td>
+    <td>Convert value to <a href="https://docs.python.org/3/library/functions.html#float"><code>float</code></a></td>
+    <td></td>
+    <td></td>
+</tr>
 </tbody>
 </table>
 
@@ -233,7 +245,7 @@ Format specifiers can be used for advanced formatting by using the options provi
     <td><code>Foo&nbsp;Bar</code></td>
 </tr>
 <tr>
-    <td><code>{foo:L6/&nbsp;.../}</code></td>
+    <td><code>{foo:X6/&nbsp;.../}</code></td>
     <td><code>Fo&nbsp;...</code></td>
 </tr>
 <tr>
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
@@ -732,11 +732,13 @@
             "api-key" : null,
             "metadata": false
         },
-        "weebcentral":
+        "webtoons":
         {
-            "sleep-request": "0.5-1.5"
+            "sleep-request": "0.5-1.5",
+
+            "quality": "original"
         },
-        "xfolio":
+        "weebcentral":
         {
             "sleep-request": "0.5-1.5"
         },
@@ -751,6 +753,10 @@
             "retweets" : false,
             "videos"   : true
         },
+        "xfolio":
+        {
+            "sleep-request": "0.5-1.5"
+        },
         "ytdl":
         {
             "cmdline-args": null,
diff --git a/gallery_dl/extractor/discord.py b/gallery_dl/extractor/discord.py
@@ -49,7 +49,10 @@ def extract_message_text(self, message):
                     text_content.append(field.get("name", ""))
                     text_content.append(field.get("value", ""))
 
-                text_content.append(embed.get("footer", {}).get("text", ""))
+                try:
+                    text_content.append(embed["footer"]["text"])
+                except Exception:
+                    pass
 
         if message.get("poll"):
             text_content.append(message["poll"]["question"]["text"])
@@ -224,10 +227,12 @@ def parse_server(self, server):
         return self.server_metadata
 
     def build_server_and_channels(self, server_id):
-        server = self.api.get_server(server_id)
-        self.parse_server(server)
+        self.parse_server(self.api.get_server(server_id))
 
-        for channel in self.api.get_server_channels(server_id):
+        for channel in sorted(
+            self.api.get_server_channels(server_id),
+            key=lambda ch: ch["type"] != 4
+        ):
             self.parse_channel(channel)
 
 
@@ -353,7 +358,8 @@ def _method(_):
                 "limit": MESSAGES_BATCH,
                 "before": before
             })
-            before = messages[-1]["id"]
+            if messages:
+                before = messages[-1]["id"]
             return messages
 
         return self._pagination(_method, MESSAGES_BATCH)
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
@@ -114,11 +114,12 @@ def _file_url(post):
             md5 = post["md5"]
             path = "/images/{}/{}/{}.webm".format(md5[0:2], md5[2:4], md5)
             post["_fallback"] = GelbooruBase._video_fallback(path)
-            url = "https://img3.gelbooru.com" + path
+            url = "https://img4.gelbooru.com" + path
         return url
 
     @staticmethod
     def _video_fallback(path):
+        yield "https://img3.gelbooru.com" + path
         yield "https://img2.gelbooru.com" + path
         yield "https://img1.gelbooru.com" + path
 
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
@@ -15,7 +15,7 @@
 import itertools
 import hashlib
 
-BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
+BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?ph?ixiv\.net"
 USER_PATTERN = BASE_PATTERN + r"/(?:en/)?users/(\d+)"
 
 
@@ -531,7 +531,7 @@ def items(self):
 class PixivWorkExtractor(PixivExtractor):
     """Extractor for a single pixiv work/illustration"""
     subcategory = "work"
-    pattern = (r"(?:https?://)?(?:(?:www\.|touch\.)?pixiv\.net"
+    pattern = (r"(?:https?://)?(?:(?:www\.|touch\.)?ph?ixiv\.net"
                r"/(?:(?:en/)?artworks/"
                r"|member_illust\.php\?(?:[^&]+&)*illust_id=)(\d+)"
                r"|(?:i(?:\d+\.pixiv|\.pximg)\.net"
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
@@ -20,6 +20,7 @@ class WebtoonsBase():
     category = "webtoons"
     root = "https://www.webtoons.com"
     cookies_domain = ".webtoons.com"
+    request_interval = (0.5, 1.5)
 
     def setup_agegate_cookies(self):
         self.cookies_update({
@@ -98,11 +99,34 @@ def metadata(self, page):
         }
 
     def images(self, page):
-        return [
-            (url.replace("://webtoon-phinf.", "://swebtoon-phinf."), None)
-            for url in text.extract_iter(
-                page, 'class="_images" data-url="', '"')
-        ]
+        quality = self.config("quality")
+        if quality is None or quality == "original":
+            quality = {"jpg": False, "jpeg": False, "webp": False}
+        elif not quality:
+            quality = None
+        elif isinstance(quality, str):
+            quality = {"jpg": quality, "jpeg": quality}
+        elif isinstance(quality, int):
+            quality = "q" + str(quality)
+            quality = {"jpg": quality, "jpeg": quality}
+        elif not isinstance(quality, dict):
+            quality = None
+
+        results = []
+        for url in text.extract_iter(
+                page, 'class="_images" data-url="', '"'):
+
+            if quality is not None:
+                path, _, query = url.rpartition("?")
+                type = quality.get(path.rpartition(".")[2].lower())
+                if type is False:
+                    url = path
+                elif type:
+                    url = "{}?type={}".format(path, type)
+
+            url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
+            results.append((url, None))
+        return results
 
 
 class WebtoonsComicExtractor(WebtoonsBase, Extractor):
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
@@ -495,6 +495,8 @@ def __getitem__(key):
     "s": str,
     "r": repr,
     "a": ascii,
+    "i": int,
+    "f": float,
 }
 _FORMAT_SPECIFIERS = {
     "?": _parse_optional,
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
@@ -108,6 +108,7 @@ def __init__(self, job, options):
         self.omode = options.get("open", omode)
         self.encoding = options.get("encoding", "utf-8")
         self.skip = options.get("skip", False)
+        self.meta_path = options.get("metadata-path")
 
     def run(self, pathfmt):
         archive = self.archive
@@ -120,6 +121,9 @@ def run(self, pathfmt):
             directory = self._directory(pathfmt)
         path = directory + self._filename(pathfmt)
 
+        if self.meta_path is not None:
+            pathfmt.kwdict[self.meta_path] = path
+
         if self.skip and os.path.exists(path):
             return
 
diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py
@@ -156,12 +156,7 @@ def convert_from_zip(self, pathfmt):
                     return self.log.debug("", exc_info=exc)
 
             if self.convert(pathfmt, tempdir):
-                if self.delete:
-                    pathfmt.delete = True
-                elif pathfmt.extension != "zip":
-                    self.log.info(pathfmt.filename)
-                    pathfmt.set_extension("zip")
-                    pathfmt.build_path()
+                pathfmt.delete = self.delete
 
     def convert_from_files(self, pathfmt):
         if not self._convert_files:
diff --git a/test/results/deviantart.py b/test/results/deviantart.py
@@ -903,7 +903,7 @@
 
 {
     "#url"     : "https://www.deviantart.com/justatest235723/art/1133021832",
-    "#comment" : "mutliple images (#6653)",
+    "#comment" : "multiple images (#6653)",
     "#category": ("", "deviantart", "deviation"),
     "#class"   : deviantart.DeviantartDeviationExtractor,
     "#archive" : False,
diff --git a/test/results/gelbooru.py b/test/results/gelbooru.py
@@ -36,7 +36,7 @@
     "#pattern" : r"https://img\d\.gelbooru\.com/images/../../[0-9a-f]{32}\.jpg",
     "#range"   : "196-204",
     "#count"   : 9,
-    "#sha1_url": "845a61aa1f90fb4ced841e8b7e62098be2e967bf",
+    "#sha1_url": "75326d788049459aff46c537fe53d6ea31a2305e",
 },
 
 {
@@ -67,11 +67,11 @@
     "#category": ("booru", "gelbooru", "favorite"),
     "#class"   : gelbooru.GelbooruFavoriteExtractor,
     "#urls"    : (
-        "https://img3.gelbooru.com/images/5d/30/5d30fc056ed8668616b3c440df9bac89.jpg",
-        "https://img3.gelbooru.com/images/4c/2d/4c2da867ed643acdadd8105177dcdaf0.png",
-        "https://img3.gelbooru.com/images/c8/26/c826f3cb90d9aaca8d0632a96bf4abe8.jpg",
-        "https://img3.gelbooru.com/images/c1/fe/c1fe59c0bc8ce955dd353544b1015d0c.jpg",
-        "https://img3.gelbooru.com/images/e6/6d/e66d8883c184f5d3b2591dfcdf0d007c.jpg",
+        "https://img4.gelbooru.com/images/5d/30/5d30fc056ed8668616b3c440df9bac89.jpg",
+        "https://img4.gelbooru.com/images/4c/2d/4c2da867ed643acdadd8105177dcdaf0.png",
+        "https://img4.gelbooru.com/images/c8/26/c826f3cb90d9aaca8d0632a96bf4abe8.jpg",
+        "https://img4.gelbooru.com/images/c1/fe/c1fe59c0bc8ce955dd353544b1015d0c.jpg",
+        "https://img4.gelbooru.com/images/e6/6d/e66d8883c184f5d3b2591dfcdf0d007c.jpg",
     ),
 },
 
@@ -81,11 +81,11 @@
     "#class"   : gelbooru.GelbooruFavoriteExtractor,
     "#options" : {"order-posts": "reverse"},
     "#urls"    : (
-        "https://img3.gelbooru.com/images/e6/6d/e66d8883c184f5d3b2591dfcdf0d007c.jpg",
-        "https://img3.gelbooru.com/images/c1/fe/c1fe59c0bc8ce955dd353544b1015d0c.jpg",
-        "https://img3.gelbooru.com/images/c8/26/c826f3cb90d9aaca8d0632a96bf4abe8.jpg",
-        "https://img3.gelbooru.com/images/4c/2d/4c2da867ed643acdadd8105177dcdaf0.png",
-        "https://img3.gelbooru.com/images/5d/30/5d30fc056ed8668616b3c440df9bac89.jpg",
+        "https://img4.gelbooru.com/images/e6/6d/e66d8883c184f5d3b2591dfcdf0d007c.jpg",
+        "https://img4.gelbooru.com/images/c1/fe/c1fe59c0bc8ce955dd353544b1015d0c.jpg",
+        "https://img4.gelbooru.com/images/c8/26/c826f3cb90d9aaca8d0632a96bf4abe8.jpg",
+        "https://img4.gelbooru.com/images/4c/2d/4c2da867ed643acdadd8105177dcdaf0.png",
+        "https://img4.gelbooru.com/images/5d/30/5d30fc056ed8668616b3c440df9bac89.jpg",
     ),
 },
 
diff --git a/test/results/pixiv.py b/test/results/pixiv.py
@@ -101,6 +101,18 @@
     "#class"   : pixiv.PixivArtworksExtractor,
 },
 
+{
+    "#url"     : "https://www.phixiv.net/member_illust.php?id=173530",
+    "#category": ("", "pixiv", "artworks"),
+    "#class"   : pixiv.PixivArtworksExtractor,
+},
+
+{
+    "#url"     : "https://phixiv.net/en/users/56514424/artworks",
+    "#category": ("", "pixiv", "artworks"),
+    "#class"   : pixiv.PixivArtworksExtractor,
+},
+
 {
     "#url"     : "https://www.pixiv.net/en/users/173530/avatar",
     "#category": ("", "pixiv", "avatar"),
@@ -341,6 +353,18 @@
     "#class"   : pixiv.PixivWorkExtractor,
 },
 
+{
+    "#url"     : "https://www.phixiv.net/en/artworks/966412",
+    "#category": ("", "pixiv", "work"),
+    "#class"   : pixiv.PixivWorkExtractor,
+},
+
+{
+    "#url"     : "https://phixiv.net/member_illust.php?mode=medium&illust_id=966412",
+    "#category": ("", "pixiv", "work"),
+    "#class"   : pixiv.PixivWorkExtractor,
+},
+
 {
     "#url"     : "https://www.pixiv.net/en/artworks/unlisted/eE3fTYaROT9IsZmep386",
     "#class"   : pixiv.PixivUnlistedExtractor,
diff --git a/test/results/reddit.py b/test/results/reddit.py
@@ -235,6 +235,14 @@
     ),
 },
 
+{
+    "#url"     : "https://old.reddit.com/r/redgifs/comments/gfvw9v/redgifs_links_on_reddit_see_comment/",
+    "#comment" : "redgifs embed",
+    "#category": ("", "reddit", "submission"),
+    "#class"   : reddit.RedditSubmissionExtractor,
+    "#urls"    : "https://redgifs.com/watch/foolishforkedabyssiniancat",
+},
+
 {
     "#url"     : "https://old.reddit.com/r/lavaporn/comments/2a00np/",
     "#category": ("", "reddit", "submission"),
diff --git a/test/results/webtoons.py b/test/results/webtoons.py
diff --git a/test/test_formatter.py b/test/test_formatter.py
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py

Original file line number	Diff line number	Diff line change
`@@ -495,6 +495,8 @@ def __getitem__(key):`
`495`	`495`	`"s": str,`
`496`	`496`	`"r": repr,`
`497`	`497`	`"a": ascii,`
	`498`	`+ "i": int,`
	`499`	`+ "f": float,`
`498`	`500`	`}`
`499`	`501`	`_FORMAT_SPECIFIERS = {`
`500`	`502`	`"?": _parse_optional,`
Original file line number	Diff line number	Diff line change
`@@ -903,7 +903,7 @@`
`903`	`903`
`904`	`904`	`{`
`905`	`905`	`"#url" : "https://www.deviantart.com/justatest235723/art/1133021832",`
`906`		`- "#comment" : "mutliple images (#6653)",`
	`906`	`+ "#comment" : "multiple images (#6653)",`
`907`	`907`	`"#category": ("", "deviantart", "deviation"),`
`908`	`908`	`"#class" : deviantart.DeviantartDeviationExtractor,`
`909`	`909`	`"#archive" : False,`