From 105b59abf87e67b25a5661c0cb8414d5e6d1d1b5 Mon Sep 17 00:00:00 2001 From: Louis Vallat Date: Wed, 24 Apr 2019 10:39:55 +0200 Subject: [PATCH] Added reddit extractor and subreddit ojects it can now parse and extract everything we need from Reddit and create Reddit posts objects that corresponds to the given object. --- .gitignore | 3 +- .../nbproject/project.properties | 2 +- .../Twitter_techsupportgore_bot.java | 65 +------ .../reddit_handler/RedditExtractor.java | 181 ++++++++++++++++++ .../reddit_handler/RedditPost.java | 61 ++++-- .../reddit_handler/RedditPostImage.java | 4 +- .../reddit_handler/RedditPostLink.java | 4 +- .../reddit_handler/RedditPostText.java | 4 +- .../reddit_handler/RedditPostVideo.java | 4 +- .../reddit_handler/SubReddit.java | 108 +++++++++++ 10 files changed, 359 insertions(+), 77 deletions(-) create mode 100644 twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditExtractor.java create mode 100644 twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/SubReddit.java diff --git a/.gitignore b/.gitignore index 1ee417d..ef4cf72 100644 --- a/.gitignore +++ b/.gitignore @@ -22,4 +22,5 @@ # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml hs_err_pid* /twitter_techsupportgore_bot/nbproject/private/ -/twitter_techsupportgore_bot/build/ \ No newline at end of file +/twitter_techsupportgore_bot/build/ +/twitter_techsupportgore_bot/dist/ \ No newline at end of file diff --git a/twitter_techsupportgore_bot/nbproject/project.properties b/twitter_techsupportgore_bot/nbproject/project.properties index c2c6b06..f161eb4 100644 --- a/twitter_techsupportgore_bot/nbproject/project.properties +++ b/twitter_techsupportgore_bot/nbproject/project.properties @@ -36,7 +36,7 @@ jar.compress=false javac.classpath=\ ${file.reference.gson-2.8.5.jar} # Space-separated list of extra javac options -javac.compilerargs= +javac.compilerargs=-Xlint:unchecked javac.deprecation=false javac.external.vm=true javac.processorpath=\ diff --git a/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/Twitter_techsupportgore_bot.java b/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/Twitter_techsupportgore_bot.java index ce6a3ea..c155a19 100644 --- a/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/Twitter_techsupportgore_bot.java +++ b/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/Twitter_techsupportgore_bot.java @@ -26,8 +26,10 @@ import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.ProtocolException; import java.net.URL; -import twitter_techsupportgore_bot.reddit_handler.RedditPost; -import twitter_techsupportgore_bot.reddit_handler.RedditPostLink; +import java.util.Collection; +import java.util.TreeMap; +import java.util.TreeSet; +import twitter_techsupportgore_bot.reddit_handler.*; /** * This is where everything begins. @@ -48,59 +50,10 @@ public class Twitter_techsupportgore_bot { public static void main(String[] args) throws MalformedURLException, ProtocolException, IOException { - String url = "https://www.reddit.com/r/techsupportgore/new.json?limit=75"; - - /* - try { - - URL myurl = new URL(url); - con = (HttpURLConnection) myurl.openConnection(); - - con.setRequestMethod("GET"); - con.setRequestProperty("User-Agent", "Mozilla 5.0 (Windows; U; " - + "Windows NT 5.1; en-US; rv:1.8.0.11) "); - - StringBuilder content; - - try (BufferedReader in = new BufferedReader( - new InputStreamReader(con.getInputStream()))) { - - String line; - content = new StringBuilder(); - - while ((line = in.readLine()) != null) { - content.append(line); - content.append(System.lineSeparator()); - } - - } - JsonObject objet = new JsonParser().parse(content.toString()).getAsJsonObject(); - JsonObject data = new JsonParser().parse(objet.get("data").toString()).getAsJsonObject(); - JsonArray children = new JsonParser().parse(data.get("children").toString()).getAsJsonArray(); - - for (int i = 0; i < children.size(); i++) { - JsonObject child = new JsonParser().parse(children.get(i).toString()).getAsJsonObject(); - JsonObject childData = new JsonParser().parse(child.get("data").toString()).getAsJsonObject(); - System.out.println("Title: " + childData.get("title").toString()); - System.out.println("From: " + childData.get("author").toString()); - System.out.println("Is crosspostable ? : " + !childData.get("is_crosspostable").getAsBoolean()); - System.out.println("Is a video ? : " + childData.get("is_video").getAsBoolean()); - System.out.println("Is mature ? : " + childData.get("over_18").getAsBoolean()); - System.out.println("Score : " + childData.get("score").getAsDouble()); - System.out.println("Post url : " + childData.get("url").getAsString()); - JsonObject preview = new JsonParser().parse(childData.get("preview").toString()).getAsJsonObject(); - JsonArray previewImages = new JsonParser().parse(preview.get("images").toString()).getAsJsonArray(); - JsonObject source = new JsonParser().parse(previewImages.get(0).toString()).getAsJsonObject(); - JsonObject urlSrc = new JsonParser().parse(source.get("source").toString()).getAsJsonObject(); - System.out.println("Source url : " + urlSrc.get("url").toString().replace("amp;", "")); - System.out.println("-----------------"); - } - - } finally { - - con.disconnect(); + TreeSet postsIndexed = new TreeSet<>(); + RedditExtractor red = new RedditExtractor("techsupportgore"); + for (RedditPost r : red.getRedditPosts()) { + System.out.println(r.getTitle()); } - - */ } -} +} \ No newline at end of file diff --git a/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditExtractor.java b/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditExtractor.java new file mode 100644 index 0000000..1ddfc24 --- /dev/null +++ b/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditExtractor.java @@ -0,0 +1,181 @@ +/* + * Copyright (C) 2019 louis + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package twitter_techsupportgore_bot.reddit_handler; + +import com.google.gson.JsonArray; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.ProtocolException; +import java.net.URL; +import java.util.HashSet; + +/** + * Reddit extractor object. + * + * @author louis + */ +public final class RedditExtractor { + + /** + * Subreddit to extract infos from. + */ + private final SubReddit sub; + + /** + * Main Constructor. + * + * @param subreddit Subreddit name. Just after /r/ + * @throws IOException + */ + public RedditExtractor(String subreddit) throws IOException { + if (!doesSubredditExists(subreddit)) { + throw new MalformedURLException("This subreddit (" + subreddit + ") does not exist."); + } else { + this.sub = new SubReddit(subreddit); + } + } + + /** + * Check if a subreddit exists. + * + * TODO: FIND A BETTER WAY TO DO THAT + * + * @param subredditName + * @return + * @throws MalformedURLException + * @throws ProtocolException + * @throws IOException + */ + public boolean doesSubredditExists(String subredditName) + throws MalformedURLException, IOException { + String urlToTest = "https://www.reddit.com/r/" + subredditName + "/"; + HttpURLConnection huc = (HttpURLConnection) (new URL(urlToTest).openConnection()); + huc.setRequestProperty("User-Agent", "Mozilla 5.0 (Windows; U; " + + "Windows NT 5.1; en-US; rv:1.8.0.11) "); + huc.setRequestMethod("HEAD"); + huc.connect(); + + int respCode = huc.getResponseCode(); + return respCode < 400; + } + + /** + * Obtain the subreddit JSON response. + * + * @return the JSON from the REDDIT api. + * @throws MalformedURLException + * @throws ProtocolException + * @throws IOException + */ + public String getSubredditJson() throws MalformedURLException, ProtocolException, IOException { + HttpURLConnection con; + URL myurl = new URL(this.sub.getJsonURL()); + con = (HttpURLConnection) myurl.openConnection(); + try { + con.setRequestMethod("GET"); + con.setRequestProperty("User-Agent", "Mozilla 5.0 (Windows; U; " + + "Windows NT 5.1; en-US; rv:1.8.0.11) "); + StringBuilder response; + try (BufferedReader in = new BufferedReader( + new InputStreamReader(con.getInputStream()))) { + String line; + response = new StringBuilder(); + while ((line = in.readLine()) != null) { + response.append(line); + response.append(System.lineSeparator()); + } + return response.toString(); + } + } finally { + con.disconnect(); + } + } + + /** + * Get Reddit's subreddit posts. + * + * @return a treeset of all the reddit posts parsed. + * @throws java.net.ProtocolException + * @throws java.net.MalformedURLException + */ + public HashSet getRedditPosts() throws ProtocolException, IOException, MalformedURLException { + HashSet set = new HashSet<>(); + String jsonResponse = getSubredditJson(); + JsonObject objet = new JsonParser().parse(jsonResponse).getAsJsonObject(); + JsonObject data = new JsonParser().parse(objet.get("data").toString()).getAsJsonObject(); + JsonArray children = new JsonParser().parse(data.get("children").toString()).getAsJsonArray(); + for (int i = 0; i < children.size(); i++) { + JsonObject child = new JsonParser().parse(children.get(i).toString()).getAsJsonObject(); + JsonObject childData = new JsonParser().parse(child.get("data").toString()).getAsJsonObject(); + if (childData.get("id").toString() != null + && !childData.get("quarantine").getAsBoolean() + && childData.get("url").getAsString() != null) { + String id = childData.get("id").toString(); + String title = childData.get("title") != null + ? childData.get("title").toString() : this.sub.getName(); + title = title.replaceAll("\"", "").replace("\\", "\""); + String author = childData.get("author") != null + ? childData.get("author").toString() : "anonymous"; + boolean quarantine = childData.get("quarantine").getAsBoolean(); + double score = childData.get("score").getAsDouble(); + String postHint = childData.get("post_hint").getAsString(); + boolean crosspostable = !childData.get("is_crosspostable").getAsBoolean(); + boolean over18 = childData.get("over_18").getAsBoolean(); + String url; + try { + JsonObject preview = new JsonParser().parse(childData.get("preview").toString()).getAsJsonObject(); + JsonArray previewImages = new JsonParser().parse(preview.get("images").toString()).getAsJsonArray(); + JsonObject source = new JsonParser().parse(previewImages.get(0).toString()).getAsJsonObject(); + JsonObject urlSrc = new JsonParser().parse(source.get("source").toString()).getAsJsonObject(); + url = urlSrc.get("url").toString().replace("amp;", ""); + } catch (NullPointerException n) { + url = childData.get("url").getAsString(); + } + String permalink = childData.get("url").getAsString(); + boolean spoiler = childData.get("spoiler").getAsBoolean(); + + if (postHint.contains("video")) { + set.add(new RedditPostVideo( + id, title, quarantine, score, postHint, + crosspostable, over18, author, + permalink, spoiler, url)); + } else if ("link".equals(postHint)) { + set.add(new RedditPostLink( + id, title, quarantine, score, postHint, + crosspostable, over18, author, + permalink, spoiler, url)); + } else if ("text".equals(postHint)) { + set.add(new RedditPostText( + id, title, quarantine, score, postHint, + crosspostable, over18, author, + permalink, spoiler, url)); + } else if ("image".equals(postHint)) { + set.add(new RedditPostImage( + id, title, quarantine, score, postHint, + crosspostable, over18, author, + permalink, spoiler, url)); + } + } + } + return set; + } +} diff --git a/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPost.java b/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPost.java index 9a4ae6e..a5250fa 100644 --- a/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPost.java +++ b/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPost.java @@ -27,7 +27,7 @@ public abstract class RedditPost { * Post's id. */ protected String postId; - + /** * Post's title. */ @@ -73,8 +73,14 @@ public abstract class RedditPost { */ protected boolean spoiler; + /** + * Post's media url. + */ + protected String url; + /** * Main constructor for a Reddit post. + * * @param id post's id * @param title post's title * @param quarantine is this post in quarantine? @@ -85,8 +91,9 @@ public abstract class RedditPost { * @param author post's author * @param permalink post's permalink * @param spoiler is this post a spoiler? + * @param url post's media url */ - public RedditPost(String id, String title, boolean quarantine, double score, String postHint, boolean crosspostable, boolean over18, String author, String permalink, boolean spoiler) { + public RedditPost(String id, String title, boolean quarantine, double score, String postHint, boolean crosspostable, boolean over18, String author, String permalink, boolean spoiler, String url) { this.postId = id; this.title = title; this.quarantine = quarantine; @@ -97,10 +104,28 @@ public abstract class RedditPost { this.author = author; this.permalink = permalink; this.spoiler = spoiler; + this.url = url; } + /** + * Get post's URL. + * @return the post's media URL. + */ + public String getUrl() { + return url; + } + + /** + * Check if the Media URL is correct. + * @return if the URL is correct. + */ + public boolean hasMediaUrl() { + return !"".equals(url); + } + /** * Get post's title. + * * @return post's title. */ public String getTitle() { @@ -109,30 +134,34 @@ public abstract class RedditPost { /** * Is this post in quarantine? + * * @return if the post is in quarantine. */ public boolean isQuarantine() { return quarantine; } - + /** * Set quarantine state for the post. + * * @param state the state to apply. */ public void setQuarantineState(boolean state) { this.quarantine = state; - } + } /** * Get post's score. + * * @return last known post's score. */ public double getScore() { return score; } - + /** * Update post's score. + * * @param newScore the new score. */ public void updateScore(double newScore) { @@ -141,6 +170,7 @@ public abstract class RedditPost { /** * Get post's hint. + * * @return post's hint. */ public String getPostHint() { @@ -149,6 +179,7 @@ public abstract class RedditPost { /** * Is this post crosspostable? + * * @return if the post is crosspostable. */ public boolean isCrosspostable() { @@ -157,6 +188,7 @@ public abstract class RedditPost { /** * Is this post NSFW? + * * @return if the post is Not Safe For Work. */ public boolean isOver18() { @@ -165,14 +197,16 @@ public abstract class RedditPost { /** * Get post's author. + * * @return post's author. */ public String getAuthor() { return author; } - + /** * Get post's permalink. + * * @return the post's permalink. */ public String getPermalink() { @@ -181,6 +215,7 @@ public abstract class RedditPost { /** * Is this post a spoiler? + * * @return if the post is a spoiler. */ public boolean isSpoiler() { @@ -189,33 +224,37 @@ public abstract class RedditPost { /** * Get post's id. + * * @return post's id. */ public String getPostId() { return postId; } - + /** * Is this post an image? + * * @return if the post is an image. */ public abstract boolean isImage(); - + /** * Is this post a text? + * * @return if the post is a text. */ public abstract boolean isText(); - + /** * Is this post a video? + * * @return if the post is a video. */ public abstract boolean isVideo(); - - + /** * Is this post a link? + * * @return if the post is a link. */ public abstract boolean isLink(); diff --git a/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPostImage.java b/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPostImage.java index f25baae..b939625 100644 --- a/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPostImage.java +++ b/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPostImage.java @@ -24,8 +24,8 @@ public class RedditPostImage extends RedditPost { public RedditPostImage(String id, String title, boolean quarantine, double score, String postHint, boolean crosspostable, - boolean over18, String author, String permalink, boolean spoiler) { - super(id, title, quarantine, score, postHint, crosspostable, over18, author, permalink, spoiler); + boolean over18, String author, String permalink, boolean spoiler, String url) { + super(id, title, quarantine, score, postHint, crosspostable, over18, author, permalink, spoiler, url); } @Override diff --git a/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPostLink.java b/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPostLink.java index 26827d4..5e9e2ad 100644 --- a/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPostLink.java +++ b/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPostLink.java @@ -24,8 +24,8 @@ public class RedditPostLink extends RedditPost { public RedditPostLink(String id, String title, boolean quarantine, double score, String postHint, boolean crosspostable, - boolean over18, String author, String permalink, boolean spoiler) { - super(id, title, quarantine, score, postHint, crosspostable, over18, author, permalink, spoiler); + boolean over18, String author, String permalink, boolean spoiler, String url) { + super(id, title, quarantine, score, postHint, crosspostable, over18, author, permalink, spoiler, url); } @Override diff --git a/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPostText.java b/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPostText.java index 0f4855a..00731f8 100644 --- a/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPostText.java +++ b/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPostText.java @@ -24,8 +24,8 @@ public class RedditPostText extends RedditPost { public RedditPostText(String id, String title, boolean quarantine, double score, String postHint, boolean crosspostable, - boolean over18, String author, String permalink, boolean spoiler) { - super(id, title, quarantine, score, postHint, crosspostable, over18, author, permalink, spoiler); + boolean over18, String author, String permalink, boolean spoiler, String url) { + super(id, title, quarantine, score, postHint, crosspostable, over18, author, permalink, spoiler, url); } @Override diff --git a/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPostVideo.java b/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPostVideo.java index 940312f..f830c02 100644 --- a/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPostVideo.java +++ b/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/RedditPostVideo.java @@ -24,8 +24,8 @@ public class RedditPostVideo extends RedditPost{ public RedditPostVideo(String id, String title, boolean quarantine, double score, String postHint, boolean crosspostable, - boolean over18, String author, String permalink, boolean spoiler) { - super(id, title, quarantine, score, postHint, crosspostable, over18, author, permalink, spoiler); + boolean over18, String author, String permalink, boolean spoiler, String url) { + super(id, title, quarantine, score, postHint, crosspostable, over18, author, permalink, spoiler, url); } @Override diff --git a/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/SubReddit.java b/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/SubReddit.java new file mode 100644 index 0000000..94ada21 --- /dev/null +++ b/twitter_techsupportgore_bot/src/twitter_techsupportgore_bot/reddit_handler/SubReddit.java @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2019 louis + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package twitter_techsupportgore_bot.reddit_handler; + +/** + * A subreddit. + * + * @author louis + */ +public class SubReddit { + + /** + * Subreddit's name. + */ + private final String name; + + /** + * Subreddit's URL. + */ + private final String url; + + /** + * Subreddit's JSON URL. + */ + private final String jsonURL; + + /** + * Dist limit for the JSON api call. + */ + private int limit = 25; + + /** + * Order for the JSON. + */ + private String order = "new"; + + /** + * Main constructor. + * + * @param name subreddit's name + */ + public SubReddit(String name) { + this.name = name; + this.url = "https://www.reddit.com/r/" + name + "/"; + this.jsonURL = this.url.substring(0, this.url.length()) + order + ".json"; + } + + /** + * Set subreddit dist limit for parsing JSON file. + * @param limit the limit between 1 and 100. + */ + public void setLimit(int limit) { + if (limit < 1 || limit > 100) { + throw new IllegalArgumentException("Limit should be between 1 and 100"); + } else { + this.limit = limit; + } + } + + /** + * Get subreddit dist limit. + * @return the limit. + */ + public int getLimit() { + return limit; + } + + /** + * Get name. + * + * @return the name. + */ + public String getName() { + return name; + } + + /** + * Get the url. + * + * @return the subreddit's url. + */ + public String getUrl() { + return url; + } + + /** + * Get the JSON URL. + * + * @return the subreddit's JSON URL. + */ + public String getJsonURL() { + return jsonURL + "?limit=" + limit; + } +}