diff --git a/.gitignore b/.gitignore index 4580c788a..e9c75df0d 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ ripme.log rips/ .history ripme.jar.update +*.swp +ripme.jar diff --git a/pom.xml b/pom.xml index 734a1b28c..e838effd5 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.0.2 + 1.0.3 ripme http://rip.rarchives.com diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/VkRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/VkRipper.java new file mode 100644 index 000000000..efcadaf71 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/VkRipper.java @@ -0,0 +1,158 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.log4j.Logger; +import org.json.JSONArray; +import org.json.JSONObject; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractRipper; + +public class VkRipper extends AbstractRipper { + + private static final String DOMAIN = "vk.com", + HOST = "vk"; + private static final Logger logger = Logger.getLogger(SeeniveRipper.class); + + public VkRipper(URL url) throws IOException { + super(url); + } + + @Override + public boolean canRip(URL url) { + return url.getHost().endsWith(DOMAIN); + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + return url; + } + + @Override + public void rip() throws IOException { + Map photoIDsToURLs = new HashMap(); + int offset = 0; + while (true) { + logger.info(" Retrieving " + this.url); + + // al=1&offset=80&part=1 + Map postData = new HashMap(); + postData.put("al", "1"); + postData.put("offset", Integer.toString(offset)); + postData.put("part", "1"); + Document doc = Jsoup.connect(this.url.toExternalForm()) + .header("Referer", this.url.toExternalForm()) + .ignoreContentType(true) + .userAgent(USER_AGENT) + .timeout(5000) + .data(postData) + .post(); + + String body = doc.toString(); + if (!body.contains(" elements = doc.select("a"); + Set photoIDsToGet = new HashSet(); + for (Element a : elements) { + if (!a.attr("onclick").contains("showPhoto('")) { + logger.error("a: " + a); + continue; + } + String photoID = a.attr("onclick"); + photoID = photoID.substring(photoID.indexOf("showPhoto('") + "showPhoto('".length()); + photoID = photoID.substring(0, photoID.indexOf("'")); + if (!photoIDsToGet.contains(photoID)) { + photoIDsToGet.add(photoID); + } + } + for (String photoID : photoIDsToGet) { + if (!photoIDsToURLs.containsKey(photoID)) { + try { + photoIDsToURLs.putAll(getPhotoIDsToURLs(photoID)); + } catch (IOException e) { + logger.error("Exception while retrieving photo id " + photoID, e); + continue; + } + } + if (!photoIDsToURLs.containsKey(photoID)) { + logger.error("Could not find URL for photo ID: " + photoID); + continue; + } + String url = photoIDsToURLs.get(photoID); + addURLToDownload(new URL(url)); + } + logger.info("Received " + elements.size() + " elements"); + if (elements.size() < 40) { + break; + } + offset += elements.size(); + } + waitForThreads(); + } + + private Map getPhotoIDsToURLs(String photoID) throws IOException { + Map photoIDsToURLs = new HashMap(); + Map postData = new HashMap(); + // act=show&al=1&list=album45506334_172415053&module=photos&photo=45506334_304658196 + postData.put("list", getGID(this.url)); + postData.put("act", "show"); + postData.put("al", "1"); + postData.put("module", "photos"); + postData.put("photo", photoID); + Document doc = Jsoup + .connect("https://vk.com/al_photos.php") + .header("Referer", this.url.toExternalForm()) + .ignoreContentType(true) + .userAgent(USER_AGENT) + .timeout(5000) + .data(postData) + .post(); + String jsonString = doc.toString(); + jsonString = jsonString.substring(jsonString.indexOf("") + "".length()); + jsonString = jsonString.substring(0, jsonString.indexOf("")); + JSONArray json = new JSONArray(jsonString); + for (int i = 0; i < json.length(); i++) { + JSONObject jsonImage = json.getJSONObject(i); + for (String key : new String[] {"z_src", "y_src", "x_src"}) { + if (!jsonImage.has(key)) { + continue; + } + photoIDsToURLs.put(jsonImage.getString("id"), jsonImage.getString(key)); + break; + } + } + return photoIDsToURLs; + } + + @Override + public String getHost() { + return HOST; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://(www\\.)?vk\\.com/(photos|album)([a-zA-Z0-9_]{1,}).*$"); + Matcher m = p.matcher(url.toExternalForm()); + if (!m.matches()) { + throw new MalformedURLException("Expected format: http://vk.com/album#### or vk.com/photos####"); + } + int count = m.groupCount(); + return m.group(count - 1) + m.group(count); + } + +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/VkRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/VkRipperTest.java new file mode 100644 index 000000000..6b782eafd --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/VkRipperTest.java @@ -0,0 +1,33 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +import com.rarchives.ripme.ripper.rippers.VkRipper; + +public class VkRipperTest extends RippersTest { + + public void testVkAlbum() throws IOException { + if (!DOWNLOAD_CONTENT) { + return; + } + List contentURLs = new ArrayList(); + contentURLs.add(new URL("https://vk.com/album45506334_172415053")); + //contentURLs.add(new URL("https://vk.com/album45506334_0")); + //contentURLs.add(new URL("https://vk.com/photos45506334")); + for (URL url : contentURLs) { + try { + VkRipper ripper = new VkRipper(url); + ripper.rip(); + assert(ripper.getWorkingDir().listFiles().length > 1); + deleteDir(ripper.getWorkingDir()); + } catch (Exception e) { + e.printStackTrace(); + fail("Error while ripping URL " + url + ": " + e.getMessage()); + } + } + } + +}