Force add/edit plugins to explicitly return tag lists
[canto-next.git] / plugins / reddit.py
1 # Canto Reddit Plugin
2 # by Jack Miller
3 # v1.2
4 #
5 # If this is placed in the plugins directory, it will add a new sort:
6 # reddit_score_sort, and will add "score [subreddit]" to the beginning of
7 # every relevant feed item.
8
9 # PREPEND_SCORE, if true will add the score to the entry title. Note, this
10 # doesn't effect the sort.
11
12 PREPEND_SCORE = True
13
14 # PREPEND_SUBREDDIT, if true will add the [subreddit] to the entry title.
15
16 PREPEND_SUBREDDIT = True
17
18 # EXTRA_LOG_OUTPUT, if true will log non-error stuff with -v.
19
20 EXTRA_LOG_OUTPUT = False
21
22 # You shouldn't have to change anything beyond this line.
23
24 from canto_next.plugins import check_program
25
26 check_program("canto-daemon")
27
28 from canto_next.fetch import DaemonFetchThreadPlugin
29 from canto_next.feed import DaemonFeedPlugin
30 from canto_next.transform import transform_locals, CantoTransform 
31
32 import urllib.request, urllib.error, urllib.parse
33 import logging
34 import time
35 import json
36 import re
37
38 log = logging.getLogger("REDDIT")
39
40 def debug(message):
41     if EXTRA_LOG_OUTPUT:
42         log.debug(message)
43
44 keep_attrs = [ "score", "subreddit" ]
45
46 class RedditFetchJSON(DaemonFetchThreadPlugin):
47     def __init__(self, fetch_thread):
48         self.plugin_attrs = {
49                 "fetch_redditJSON" : self.fetch_redditJSON,
50         }
51
52         self.comment_id_regex = re.compile(".*comments/([^/]*)/.*")
53         self.tb_id_regex = re.compile(".*tb/([^/]*)")
54
55     def fetch_redditJSON(self, **kwargs):
56         if "reddit.com" not in kwargs["feed"].URL:
57             return
58
59         # Get the feed's JSON
60         try:
61             json_url = kwargs["feed"].URL.replace("/.rss","/.json")
62             req = urllib.request.Request(json_url, headers = { "User-Agent" : "Canto-Reddit-Plugin"})
63             response = urllib.request.urlopen(req, None, 10)
64             reddit_json = json.loads(response.read().decode())
65         except Exception as e:
66             log.error("Error fetching Reddit JSON: %s" % e)
67             return
68
69         for entry in kwargs["newcontent"]["entries"]:
70             m = self.comment_id_regex.match(entry["link"])
71             if not m:
72                 m = self.tb_id_regex.match(entry["link"])
73             if not m:
74                 debug("Couldn't find ID in %s ?!" % entry["link"])
75                 continue
76             m = "t3_" + m.groups()[0]
77
78             for rj in reddit_json["data"]["children"]:
79                 if rj["data"]["name"] == m:
80                     debug("Found m=%s" % m)
81
82                     d = { "data" : {}}
83                     for attr in keep_attrs:
84                         if attr in rj["data"]:
85                             d["data"][attr] = rj["data"][attr]
86
87                     entry["reddit-json"] = d
88                     break
89             else:
90                 debug("Couldn't find m= %s" % m)
91
92 class RedditScoreSort(CantoTransform):
93     def __init__(self):
94         pass
95
96     def needed_attributes(self, tag):
97         return [ "reddit-score" ]
98
99     def transform(self, items, attrs):
100         scored = []
101         unscored = []
102
103         for item in items:
104             if "reddit-score" in attrs[item]:
105
106                 # For some reason, reddit-score has been parsed as a string
107                 # some times. Attempt to coerce.
108
109                 if not type(attrs[item]["reddit-score"]) == int:
110                     try:
111                         attrs[item]["reddit-score"] =\
112                                 int(attrs[item]["reddit-score"])
113                     except:
114                         unscored.append(item)
115                     else:
116                         scored.append(item)
117                 else:
118                     scored.append(item)
119             else:
120                 unscored.append(item)
121
122         scored = [ (attrs[x]["reddit-score"], x) for x in scored ]
123         scored.sort()
124         scored.reverse()
125         scored = [ x for (s, x) in scored ]
126
127         return scored + unscored
128
129 class RedditAnnotate(DaemonFeedPlugin):
130     def __init__(self, daemon_feed):
131
132         self.plugin_attrs = {
133                 "edit_reddit" : self.edit_reddit,
134         }
135
136     def edit_reddit(self, feed, newcontent, tags_to_add, tags_to_remove, remove_items):
137         for entry in newcontent["entries"]:
138             if "reddit-json" not in entry:
139                 debug("NO JSON, bailing")
140                 continue
141
142             rj = entry["reddit-json"]
143             if not rj:
144                 debug("JSON empty, bailing")
145                 continue
146
147             if "subreddit" not in entry:
148                 entry["subreddit"] = rj["data"]["subreddit"]
149                 if PREPEND_SUBREDDIT:
150                     entry["title"] =\
151                             "[" + entry["subreddit"] + "] " + entry["title"]
152
153             if PREPEND_SCORE:
154                 score = rj["data"]["score"]
155                 if "reddit-score" in entry:
156                     entry["title"] = re.sub("^\d+ ", "", entry["title"])
157
158                 entry["reddit-score"] = score
159                 entry["title"] =\
160                         ("%d " % entry["reddit-score"]) + entry["title"]
161
162         return (tags_to_add, tags_to_remove, remove_items)
163
164 transform_locals["reddit_score_sort"] = RedditScoreSort()