Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Xhamster single picture ripping capability #357

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
37 changes: 21 additions & 16 deletions src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
Original file line number Diff line number Diff line change
Expand Up @@ -106,23 +106,9 @@ public boolean addURLToDownload(URL url, String prefix, String subdirectory, Str
return false;
}
logger.debug("url: " + url + ", prefix: " + prefix + ", subdirectory" + subdirectory + ", referrer: " + referrer + ", cookies: " + cookies);
String saveAs = url.toExternalForm();
saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
File saveFileAs;
try {
if (!subdirectory.equals("")) {
subdirectory = File.separator + subdirectory;
}
saveFileAs = new File(
workingDir.getCanonicalPath()
+ subdirectory
+ File.separator
+ prefix
+ saveAs);
saveFileAs = getSaveAsFile(url, prefix, subdirectory);
} catch (IOException e) {
logger.error("[!] Error creating save file path for URL '" + url + "':", e);
return false;
Expand All @@ -134,7 +120,26 @@ public boolean addURLToDownload(URL url, String prefix, String subdirectory, Str
}
return addURLToDownload(url, saveFileAs, referrer, cookies);
}


protected File getSaveAsFile(URL url, String prefix, String subdirectory) throws IOException {
String saveAs = url.toExternalForm();
saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
File saveFileAs;
if (!subdirectory.equals("")) {
subdirectory = File.separator + subdirectory;
}
saveFileAs = new File(
workingDir.getCanonicalPath()
+ subdirectory
+ File.separator
+ prefix
+ saveAs);
return saveFileAs;
}

/**
* Queues file to be downloaded and saved. With options.
Expand Down
12 changes: 12 additions & 0 deletions src/main/java/com/rarchives/ripme/ripper/AlbumRipper.java
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,18 @@ public boolean addURLToDownload(URL url, File saveAs, String referrer, Map<Strin
return true;
}

@Override
public boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String,String> cookies) {
File saveFileAs;
try {
saveFileAs = getSaveAsFile(url, prefix, subdirectory);
} catch (IOException e) {
logger.error("[!] Error creating save file path for URL '" + url + "':", e);
return false;
}
return addURLToDownload(url, saveFileAs, referrer, cookies);
}

@Override
public boolean addURLToDownload(URL url, File saveAs) {
return addURLToDownload(url, saveAs, null, null);
Expand Down
137 changes: 118 additions & 19 deletions src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
package com.rarchives.ripme.ripper.rippers;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

Expand All @@ -17,14 +21,17 @@ public class XhamsterRipper extends AlbumRipper {

private static final String HOST = "xhamster";

private static Pattern xhPattern = Pattern.compile("^https?://[a-z.]*" + HOST + "\\.com/photos/(?:gallery/([0-9]+).*|view/([0-9]+)-([0-9]+)\\.html(?:.*)?)$");

private HashMap<String, Document> docs = new HashMap<String, Document>();

public XhamsterRipper(URL url) throws IOException {
super(url);
}

@Override
public boolean canRip(URL url) {
Pattern p = Pattern.compile("^https?://[wmde.]*xhamster\\.com/photos/gallery/[0-9]+.*$");
Matcher m = p.matcher(url.toExternalForm());
Matcher m = xhPattern.matcher(url.toExternalForm());
return m.matches();
}

Expand All @@ -35,22 +42,41 @@ public URL sanitizeURL(URL url) throws MalformedURLException {

@Override
public void rip() throws IOException {
if (isGallery(url)) {
ripGallery();
} else {
ripPhoto();
}
}

private static boolean isGallery(URL url) {
Matcher m = xhPattern.matcher(url.toExternalForm());
if (!m.matches()) {
return false;
}
return m.group(3) == null || m.group(3).length() == 0; // Is a gallery.
}

private void ripPhoto() throws IOException {
Document doc = downloadAndSaveHTML(url);
for (Element element : doc.select("img#imgSized")) {
String image = cleanImageSrc(element.attr("src"));
addURLToDownload(new URL(image), "", "", url.toExternalForm(), Utils.getCookies(HOST));
}
waitForThreads();
}

private void ripGallery() throws IOException {
int index = 0;
String nextURL = this.url.toExternalForm();
String nextURL = url.toExternalForm();
while (nextURL != null) {
logger.info(" Retrieving " + nextURL);
Document doc = Http.url(nextURL).get();
Document doc = downloadAndSaveHTML(new URL(nextURL));
for (Element thumb : doc.select("table.iListing div.img img")) {
if (!thumb.hasAttr("src")) {
continue;
}
String image = thumb.attr("src");
image = image.replaceAll(
"http://p[0-9]*\\.",
"http://up.");
image = image.replaceAll(
"_160\\.",
"_1000.");
String image = cleanImageSrc(thumb.attr("src"));
index += 1;
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
Expand All @@ -73,22 +99,95 @@ public void rip() throws IOException {
waitForThreads();
}

private String cleanImageSrc(String imageSrc) {
imageSrc = imageSrc.replaceAll("https?://p[0-9]*\\.", "https?://up.");
imageSrc = imageSrc.replaceAll("_160\\.", "_1000.");
return imageSrc;
}

private Document downloadAndSaveHTML(URL url) throws IOException {
String urlString = url.toExternalForm();
Document doc = docs.get(urlString);
if (doc == null) {
doc = Http.url(url).header("User-Agent", USER_AGENT).referrer(url).cookies(Utils.getCookies(HOST)).get();
docs.put(urlString, doc);
}
String filename = urlToFilename(url);
if (getWorkingDir() != null) {
Files.write(Paths.get(getWorkingDir().getCanonicalPath() + File.separator + filename), doc.toString().getBytes());
}
return doc;
}

private static String urlToFilename(URL url) {
String filename = url.toExternalForm().replaceFirst("^https?://.*/", "").replaceFirst("[#&:].*$", "");
if (filename.contains("?") && filename.contains(".")) {
int periodIdx = filename.lastIndexOf('.');
int questionMarkIdx = filename.indexOf('?');
String params = filename.substring(questionMarkIdx + 1).replaceAll("=", "-").replaceAll("&", "_");
filename = filename.substring(0, periodIdx) + "_" + params + filename.substring(periodIdx, questionMarkIdx);
}
return filename;
}

@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
String title = HOST + "_";
Document doc = null;
try {
doc = downloadAndSaveHTML(url);
} catch (IOException e) {
logger.error("Exception retrieving url=" + url + ": " + e.getMessage());
title += getGID(url);
}
if (doc != null) {
// Find username.
Element link = doc.select("#galleryUser .item a").first();
if (link != null) {
title += link.text() + "_";
} else {
logger.warn("No username was found in the contents of url=" + url);
}
} else {
logger.warn("No username could be retrieved for url=" + url);
}
String galleryLink = url.toExternalForm();
if (!isGallery(url) && doc != null) {
for (Element link : doc.select("#viewBox a")) {
if (link != null) {
String href = link.attr("href");
if (href.length() > 0 && !href.startsWith("#")) {
galleryLink = href;
break;
}
}
}
if (galleryLink == url.toExternalForm()) {
logger.warn("No gallery title link was found for url=" + url);
}
}
title += galleryLink
.replaceFirst("^http.*/photos/(?:gallery/([^?#:&]+)|view/([^-]+)-).*$", "$1$2")
.replace('/', '-')
.replace(".html", "");
return title;
}

@Override
public String getHost() {
return HOST;
}

@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://([a-z0-9.]*?)xhamster\\.com/photos/gallery/([0-9]{1,})/.*\\.html");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(2);
}
throw new MalformedURLException(
String gid = url.toExternalForm().replaceFirst("^https?://(?:[a-z0-9.]*?)" + HOST + "\\.com/photos/(?:gallery/([0-9]{1,})/.*\\.html|view/([^-]+)-).*$", "$1$2");
if (gid.length() == 0) {
throw new MalformedURLException(
"Expected xhamster.com gallery formats: "
+ "xhamster.com/photos/gallery/#####/xxxxx..html"
+ " Got: " + url);
+ "http://xhamster.com/photos/gallery/#####/xxxxx..html or http://xhamster.com/photos/view/####-####.html"
+ " Got: " + url);
}
return gid;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.rarchives.ripme.utils.Utils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.rarchives.ripme.ripper.VideoRipper;
Expand Down Expand Up @@ -39,28 +41,50 @@ public URL sanitizeURL(URL url) throws MalformedURLException {

@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://.*xhamster\\.com/movies/([0-9]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}

throw new MalformedURLException(
String gid = url.toExternalForm().replaceFirst("^https?://.*" + HOST + "\\.com/movies/([0-9]+)/.*$", "$1");
if (gid.length() == 0) {
throw new MalformedURLException(
"Expected xhamster format:"
+ "xhamster.com/movies/####"
+ " Got: " + url);
+ "xhamster.com/movies/####"
+ " Got: " + url);

}
return gid;
}

@Override
public void rip() throws IOException {
logger.info("Retrieving " + this.url);
Document doc = Http.url(url).get();
logger.info("Retrieving " + url);
Document doc = Http.url(url).header("User-Agent", USER_AGENT).referrer("http://" + HOST + ".com/").cookies(Utils.getCookies(HOST)).get();
Elements videos = doc.select("a.mp4Thumb");
if (videos.size() == 0) {
throw new IOException("Could not find Embed code at " + url);
}
String vidUrl = videos.attr("href");
addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url));
addURLToDownload(new URL(vidUrl), getVideoName(), "", url.toExternalForm(), Utils.getCookies(HOST));
waitForThreads();
}
}

private String getVideoName() throws IOException {
String title = HOST + "_";
try {
Document doc = Http.url(url).header("User-Agent", USER_AGENT).referrer(url).cookies(Utils.getCookies(HOST)).get();
Element link = doc.select("#videoUser a").first();
if (link != null) {
title += link.text() + "_";
}
} catch (IOException e) {
logger.error("Exception retrieving url=" + url + ": " + e.getMessage());
try {
title += getGID(url);
} catch (MalformedURLException malformedEx) {
throw new IOException(malformedEx.getMessage());
}
}
title += url.toExternalForm()
.replaceFirst("^https?://.*" + HOST + "\\.com/movies/([0-9]+)/([^\\.]+).*$", "$1_$2_")
.replaceAll("_+", "_");
return title;
}

}
24 changes: 24 additions & 0 deletions src/main/java/com/rarchives/ripme/utils/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;

Expand Down Expand Up @@ -71,6 +73,11 @@ public class Utils {
}
}

private static HashMap<String, HashMap<String, String>> cookieCache;
static {
cookieCache = new HashMap<String, HashMap<String, String>>();
}

/**
* Get the root rips directory.
* @return
Expand Down Expand Up @@ -387,4 +394,21 @@ public static List<String> between(String fullText, String start, String finish)
}
return result;
}

public static Map<String, String> getCookies(String host) {
HashMap<String, String> domainCookies = cookieCache.get(host);
if (domainCookies == null) {
domainCookies = new HashMap<String, String>();
String cookiesConfig = getConfigString("cookies." + host, "");
for (String pair : cookiesConfig.split(" ")) {
pair = pair.trim();
if (pair.contains("=")) {
String[] pieces = pair.split("=", 2);
domainCookies.put(pieces[0], pieces[1]);
}
}
cookieCache.put(host, domainCookies);
}
return domainCookies;
}
}
4 changes: 4 additions & 0 deletions src/main/resources/rip.properties
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,7 @@ twitter.max_requests = 10
clipboard.autorip = false

download.save_order = true

cookies.xhamster =
# e.g. cookies.xhamster = USERNAME=sleaze UID=69696969 PWD=144354bc90792a91957df1ef962908c1 fingerprint=d65f704a8fef31b5327175e00f1eeb85

Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ public void testImagevenueRip() throws IOException {
}

public void testImgboxRip() throws IOException {
AbstractRipper ripper = new ImgboxRipper(new URL("http://imgbox.com/g/sEMHfsqx4w"));
AbstractRipper ripper = new ImgboxRipper(new URL("http://imgbox.com/g/z7Bj2FjxJX"));
testRipper(ripper);
}

Expand Down