Skip to content

Commit

Permalink
[xfolio] add initial support (#5514, #6351, #6837)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Jan 18, 2025
1 parent dc7b46b commit 438c616
Show file tree
Hide file tree
Showing 6 changed files with 247 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,7 @@ Default
``urlgalleries``,
``vk``,
``weebcentral``,
``xfolio``,
``zerochan``
* ``"1.0-2.0"``
``flickr``,
Expand Down
6 changes: 5 additions & 1 deletion docs/gallery-dl.conf
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,10 @@
{
"sleep-request": "0.5-1.5"
},
"xfolio":
{
"sleep-request": "0.5-1.5"
},
"weibo":
{
"sleep-request": "1.0-2.0",
Expand Down Expand Up @@ -923,7 +927,7 @@
"config-file" : null,
"enabled" : true,
"format" : null,
"forward-cookies": false,
"forward-cookies": true,
"logging" : true,
"module" : null,
"outtmpl" : null,
Expand Down
6 changes: 6 additions & 0 deletions docs/supportedsites.md
Original file line number Diff line number Diff line change
Expand Up @@ -1081,6 +1081,12 @@ Consider all listed sites to potentially be NSFW.
<td>Galleries</td>
<td></td>
</tr>
<tr>
<td>Xfolio</td>
<td>https://xfolio.jp/</td>
<td>Series, User Profiles, Works</td>
<td></td>
</tr>
<tr>
<td>xHamster</td>
<td>https://xhamster.com/</td>
Expand Down
1 change: 1 addition & 0 deletions gallery_dl/extractor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@
"wikiart",
"wikifeet",
"wikimedia",
"xfolio",
"xhamster",
"xvideos",
"yiffverse",
Expand Down
146 changes: 146 additions & 0 deletions gallery_dl/extractor/xfolio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# -*- coding: utf-8 -*-

# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://xfolio.jp/"""

from .common import Extractor, Message
from .. import text, exception

BASE_PATTERN = r"(?:https?://)?xfolio\.jp(?:/[^/?#]+)?"


class XfolioExtractor(Extractor):
"""Base class for xfolio extractors"""
category = "xfolio"
root = "https://xfolio.jp"
cookies_domain = ".xfolio.jp"
directory_fmt = ("{category}", "{creator_slug}", "{work_id}")
filename_fmt = "{work_id}_{image_id}.{extension}"
archive_fmt = "{work_id}_{image_id}"
request_interval = (0.5, 1.5)

def _init(self):
XfolioExtractor._init = Extractor._init
if not self.cookies_check(("xfolio_session",)):
self.log.error("'xfolio_session' cookie required")

def items(self):
data = {"_extractor": XfolioWorkExtractor}
for work in self.works():
yield Message.Queue, work, data

def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)

if "/system/recaptcha" in response.url:
raise exception.StopExtraction("Bot check / CAPTCHA page")

return response


class XfolioWorkExtractor(XfolioExtractor):
subcategory = "work"
pattern = BASE_PATTERN + r"/portfolio/([^/?#]+)/works/(\d+)"
example = "https://xfolio.jp/portfolio/USER/works/12345"
ref_fmt = ("{}/fullscale_image?image_id={}&work_id={}")
url_fmt = ("{}/user_asset.php?id={}&work_id={}"
"&work_image_id={}&type=work_image")

def items(self):
creator, work_id = self.groups
url = "{}/portfolio/{}/works/{}".format(self.root, creator, work_id)
html = self.request(url).text

work = self._extract_data(html)
files = self._extract_files(html, work)
work["count"] = len(files)

yield Message.Directory, work
for work["num"], file in enumerate(files, 1):
file.update(work)
yield Message.Url, file["url"], file

def _extract_data(self, html):
creator, work_id = self.groups
extr = text.extract_from(html)
return {
"title" : text.unescape(extr(
'property="og:title" content="', '"').rpartition(" - ")[0]),
"description" : text.unescape(extr(
'property="og:description" content="', '"')),
"creator_id" : extr(' data-creator-id="', '"'),
"creator_userid" : extr(' data-creator-user-id="', '"'),
"creator_name" : extr(' data-creator-name="', '"'),
"creator_profile": text.unescape(extr(
' data-creator-profile="', '"')),
"series_id" : extr("/series/", '"'),
"creator_slug" : creator,
"work_id" : work_id,
}

def _extract_files(self, html, work):
files = []

work_id = work["work_id"]
for img in text.extract_iter(
html, 'class="article__wrap_img', "</div>"):
image_id = text.extr(img, "/fullscale_image?image_id=", "&")
if not image_id:
self.log.warning(
"%s: 'fullscale_image' not available", work_id)
continue

files.append({
"image_id" : image_id,
"extension": "jpg",
"url": self.url_fmt.format(
self.root, image_id, work_id, image_id),
"_http_headers": {"Referer": self.ref_fmt.format(
self.root, image_id, work_id)},
})

return files


class XfolioUserExtractor(XfolioExtractor):
subcategory = "user"
pattern = BASE_PATTERN + r"/portfolio/([^/?#]+)(?:/works)?/?(?:$|\?|#)"
example = "https://xfolio.jp/portfolio/USER"

def works(self):
url = "{}/portfolio/{}/works".format(self.root, self.groups[0])

while True:
html = self.request(url).text

for item in text.extract_iter(
html, '<div class="postItem', "</div>"):
yield text.extr(item, ' href="', '"')

pager = text.extr(html, ' class="pager__list_next', "</li>")
url = text.extr(pager, ' href="', '"')
if not url:
return
url = text.unescape(url)


class XfolioSeriesExtractor(XfolioExtractor):
subcategory = "series"
pattern = BASE_PATTERN + r"/portfolio/([^/?#]+)/series/(\d+)"
example = "https://xfolio.jp/portfolio/USER/series/12345"

def works(self):
creator, series_id = self.groups
url = "{}/portfolio/{}/series/{}".format(self.root, creator, series_id)
html = self.request(url).text

return [
text.extr(item, ' href="', '"')
for item in text.extract_iter(
html, 'class="listWrap--title">', "</a>")
]
88 changes: 88 additions & 0 deletions test/results/xfolio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# -*- coding: utf-8 -*-

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

from gallery_dl.extractor import xfolio
from gallery_dl import exception


__tests__ = (
{
"#url" : "https://xfolio.jp/portfolio/yutakashii/works/23977",
"#class" : xfolio.XfolioWorkExtractor,
"#urls" : (
"https://xfolio.jp/user_asset.php?id=113179&work_id=23977&work_image_id=113179&type=work_image",
"https://xfolio.jp/user_asset.php?id=113182&work_id=23977&work_image_id=113182&type=work_image",
"https://xfolio.jp/user_asset.php?id=113185&work_id=23977&work_image_id=113185&type=work_image",
"https://xfolio.jp/user_asset.php?id=113188&work_id=23977&work_image_id=113188&type=work_image",
"https://xfolio.jp/user_asset.php?id=113191&work_id=23977&work_image_id=113191&type=work_image",
"https://xfolio.jp/user_asset.php?id=113194&work_id=23977&work_image_id=113194&type=work_image",
"https://xfolio.jp/user_asset.php?id=113197&work_id=23977&work_image_id=113197&type=work_image",
"https://xfolio.jp/user_asset.php?id=113200&work_id=23977&work_image_id=113200&type=work_image",
"https://xfolio.jp/user_asset.php?id=113203&work_id=23977&work_image_id=113203&type=work_image",
),

"count" : 9,
"num" : range(1, 9),
"creator_id" : "1495",
"creator_name" : "香椎ゆたか",
"creator_profile": "連載中:「いつまでも可愛くしてると思うなよ!」 https://booklive.jp/product/index/title_id/10003104/vol_no/001\r\n 過去作:「まじとら!」「男友達ガール」\r\npixiv:http://pixiv.me/yutakashii\r\nskeb:http://skeb.jp/@yutakashii",
"creator_slug" : "yutakashii",
"creator_userid" : "3778",
"description" : "BookLive NINOにて「男友達ガール」連載開始しました。ルームシェア+TSFで、ある日突然同居人が可愛い女の子になったら…という感じのラブ(?)コメディ...",
"extension" : "jpg",
"image_id" : r"re:113\d\d\d",
"series_id" : "",
"title" : "新連載「男友達ガール」冒頭試し読み",
"url" : str,
"work_id" : "23977",
},

{
"#url" : "https://xfolio.jp/portfolio/yutakashii",
"#class" : xfolio.XfolioUserExtractor,
"#pattern" : xfolio.XfolioWorkExtractor.pattern,
"#count" : range(50, 100),
},

{
"#url" : "https://xfolio.jp/portfolio/yutakashii/works",
"#class" : xfolio.XfolioUserExtractor,
},
{
"#url" : "https://xfolio.jp/portfolio/yutakashii/works?page=3",
"#class" : xfolio.XfolioUserExtractor,
},
{
"#url" : "https://xfolio.jp/en/portfolio/yutakashii",
"#class" : xfolio.XfolioUserExtractor,
},
{
"#url" : "https://xfolio.jp/ko/portfolio/yutakashii",
"#class" : xfolio.XfolioUserExtractor,
},
{
"#url" : "https://xfolio.jp/zh-CN/portfolio/yutakashii",
"#class" : xfolio.XfolioUserExtractor,
},

{
"#url" : "https://xfolio.jp/portfolio/donguri/series/1391402",
"#class" : xfolio.XfolioSeriesExtractor,
"#auth" : False,
"#exception": exception.StopExtraction,
},

{
"#url" : "https://xfolio.jp/portfolio/donguri/series/1391402",
"#class" : xfolio.XfolioSeriesExtractor,
"#auth" : True,
"#urls" : (
"https://xfolio.jp/portfolio/donguri/works/2472402",
"https://xfolio.jp/portfolio/donguri/works/2470700",
),
},

)

0 comments on commit 438c616

Please sign in to comment.