7c507a2bf153ac03687a78345b0fceb0c4c15462
[canto-next.git] / plugins / sync-inoreader.py
1 # Canto Inoreader Plugin
2 # by Jack Miller
3 # v0.2
4
5 # DEPENDENCIES
6
7 # This plugin requires the 'requests' module, which can usually be found in
8 # your distro's package manager as python3-requests (or python-requests on
9 # Arch).
10
11 # IMPORTANT NOTES
12
13 # - When this plugin is enabled, canto will synchronize your subscribed feeds
14 # with Inoreader. If you've been using canto, you should export your feeds
15 # (canto-remote export > feeds.opml) and import them into Inoreader if you
16 # don't want to lose your feeds because Inoreader's info is assumed to be more
17 # correct than ours.
18 #
19 # - Feed subscriptions are only synchronized *from Inoreader* on startup, so if
20 # you add / remove feeds with Inoreader, you will have to restart the daemon to
21 # pickup the changes. Adding or removing feeds with canto works properly all
22 # the time.
23 #
24 # - You should probably only enable this if you have disabled other sync-*
25 # plugins (like sync-rsync). They won't break, but having multiple
26 # synchronization points is pointless.
27 #
28 # - You must have a standard Inoreader account, not an OAuth (Google/Facebook
29 # login).
30
31 # CONFIGURATION
32
33 # Inoreader credentials
34
35 EMAIL="somebody@somewhere.com"
36 PASSWORD="passw0rd"
37
38 # You don't *have* to change these, but the API is rate limited. So if you want
39 # to avoid rate limit issues, register your own application Preferences ->
40 # Developer options on the Inoreader site and replace these.
41
42 APP_ID="1000001299"
43 APP_KEY="i0UOUtLQjj2WTre8WA3a9GWt_cgDhpkO"
44
45 BASE_URL="https://www.inoreader.com/reader/"
46
47 # === You shouldn't have to change anything past this line. ===
48
49 from canto_next.fetch import DaemonFetchThreadPlugin
50 from canto_next.feed import DaemonFeedPlugin, allfeeds
51 from canto_next.hooks import call_hook, on_hook
52 from canto_next.config import config
53
54 from urllib.parse import urlencode, quote
55 import traceback
56 import requests
57 import logging
58 import time
59 import json
60
61 log = logging.getLogger("SYNC-INOREADER")
62
63 extra_headers = {
64         "User-Agent" : "Canto/0.9.0 + http://codezen.org/canto-ng",
65         "AppKey" : APP_KEY,
66         "AppID" : APP_ID,
67 }
68
69 def ino_get_auth():
70     headers = extra_headers.copy()
71     headers['Email'] = EMAIL
72     headers['Passwd'] = PASSWORD
73
74     r = requests.get("https://www.inoreader.com/accounts/ClientLogin", headers)
75     if r.status_code != 200:
76         raise Exception("Failed to authorize: [%s] %s" % (r.status_code, r.text))
77
78     for line in r.text.splitlines():
79         if line.startswith("Auth="):
80             return line[5:]
81
82     raise Exception("Failed to find Auth= in auth response")
83
84 authorization = ino_get_auth()
85
86 log.debug("authorization: %s" % authorization)
87
88 # XXX : Needs to handle errors / reauth
89
90 def inoreader_req(path, query = {}):
91     headers = extra_headers.copy()
92     headers["Authorization"] = "GoogleLogin auth=" + authorization
93
94     r = requests.get(BASE_URL + path, params=query, headers=headers)
95
96     if r.status_code != 200:
97         log.debug("STATUS %s" % r.status_code)
98         log.debug(r.headers)
99         log.debug(r.text)
100
101     return r
102
103 def full_ino_tag_suffix(tag):
104     if tag in ["read", "starred"]:
105         return "/state/com.google/" + tag
106     return "/label/" + tag
107
108 def full_ino_tag(tag):
109     return "user/-" + full_ino_tag_suffix(tag)
110
111 def strip_ino_tag(tag):
112     tag = tag.split("/", 3)
113     if tag[2] == "state":
114         return tag[3].split("/", 1)[1]
115     return tag[3]
116
117 def has_ino_tag(item, tag):
118     if "inoreader_categories" not in item:
119         return False
120
121     suff = full_ino_tag_suffix(tag)
122     for category in item["inoreader_categories"]:
123         if category.endswith(suff):
124             return True
125     return False
126
127 def inoreader_add_tag(ino_id, tag):
128     path = "api/0/edit-tag?a=" + quote(full_ino_tag(tag))
129     path += "&i=" + quote(ino_id)
130     inoreader_req(path)
131
132 def inoreader_remove_tag(ino_id, tag):
133     path = "api/0/edit-tag?r=" + quote(full_ino_tag(tag))
134     path += "&i=" + quote(ino_id)
135     inoreader_req(path)
136
137 def inoreader_get_subs():
138     return inoreader_req("api/0/subscription/list").json()["subscriptions"]
139
140 def inoreader_add_sub(feed_url, title):
141     query = {
142         "ac" : "subscribe",
143         "s" : "feed/" + feed_url,
144         "t" : title
145     }
146
147     inoreader_req("api/0/subscription/edit", query)
148
149 def inoreader_del_sub(feed_url):
150     query = {
151         "ac" : "unsubscribe",
152         "s" : "feed/" + feed_url
153     }
154     inoreader_req("api/0/subscription/edit", query)
155
156
157 # Given a change set, and the current attributes of a canto item, tell
158 # Inoreader about it.
159
160 def sync_state_to(changes, attrs, add_only = False):
161     if "canto-state" in changes:
162         if "read" in changes["canto-state"]:
163             if not has_ino_tag(attrs, "read"):
164                 inoreader_add_tag(attrs["inoreader_id"], "read")
165         elif not add_only:
166             if has_ino_tag(attrs, "read"):
167                 inoreader_remove_tag(attrs["inoreader_id"], "read")
168
169     if "canto-tags" in changes:
170         for tag in changes["canto-tags"]:
171             tag = tag.split(":", 1)[1] # strip user: or category: prefix
172             if not has_ino_tag(attrs, tag):
173                 inoreader_add_tag(attrs["inoreader_id"], tag)
174
175         if add_only:
176             return
177
178         for tag in attrs["inoreader_categories"]:
179             tag = strip_ino_tag(tag)
180             if "user:" + tag not in changes[item_id]["canto-tags"]:
181                 inoreader_remove_tag(attrs["inoreader_id"], tag)
182
183 # Inoreader communicates with canto through this fetch thread plugin
184
185 # After we've grabbed the feed, and used feedparser on it, we run
186 # fetch_inoreader_sync which will add inoreader information.
187
188 class CantoFetchInoReader(DaemonFetchThreadPlugin):
189     def __init__(self, fetch_thread):
190         self.plugin_attrs = { "fetch_inoreader_sync" : self.fetch_inoreader_sync }
191         self.fetch_thread = fetch_thread
192
193     def fetch_inoreader_sync(self, **kwargs):
194         # Grab these from the parent object
195
196         feed = kwargs["feed"]
197         newcontent = kwargs["newcontent"]
198
199         stream_id = quote("feed/" + feed.URL, [])
200
201         query = { "n" : 1000 }
202
203         # Collect all of the items
204
205         ino_entries = []
206         content_path = "api/0/stream/contents/" + stream_id
207
208         try:
209             r = inoreader_req(content_path, query).json()
210             ino_entries.extend(r["items"])
211
212             #while "continuation" in r:
213             #    query["c"] = r["continuation"]
214             #    r = inoreader_req(content_path, query).json()
215             #    ino_entries.extend(r["items"])
216         except Exception as e:
217             log.debug("EXCEPT: %s" % traceback.format_exc(e))
218
219         for ino_entry in ino_entries:
220
221             for canto_entry in newcontent["entries"]:
222                 if ino_entry["canonical"][0]["href"] != canto_entry["link"]:
223                     continue
224
225                 canto_entry["inoreader_id"] = ino_entry["id"]
226                 canto_entry["inoreader_categories"] = ino_entry["categories"]
227
228
229 # Since we've included the Inoreader information, wait until we've done most of
230 # feed.index to edit the internal state.
231
232 # We do this separately because it's not until after feed.index() that we have
233 # existing information (canto-state / canto-tags) included in the feedparser
234 # data.
235
236 class CantoFeedInoReader(DaemonFeedPlugin):
237     def __init__(self, feed):
238         self.plugin_attrs = { "edit_inoreader_sync" : self.edit_inoreader_sync }
239         self.feed = feed
240
241     def _list_add(self, item, attr, new):
242         if attr not in item:
243             item[attr] = [ new ]
244         elif new not in item[attr]:
245             item[attr].append(new)
246
247     def add_utag(self, item, tags_to_add, tag):
248         self._list_add(item, "canto-tags", "user:" + tag)
249         tags_to_add.append((self.feed._cacheitem(item)["id"], "user:" + tag))
250
251     def add_state(self, item, state):
252         self._list_add(item, "canto-state", state)
253
254     def edit_inoreader_sync(self, **kwargs):
255         newcontent = kwargs["newcontent"]
256         tags_to_add = kwargs["tags_to_add"]
257         tags_to_remove = kwargs["tags_to_remove"]
258
259         for entry in newcontent["entries"]:
260             # If we didn't get an id for this item, skip it
261
262             if "inoreader_id" not in entry:
263                 continue
264
265             for category in entry["inoreader_categories"]:
266                 if category.endswith("/state/com.google/read"):
267                     self.add_state(entry, "read")
268                     continue
269
270                 cat = category.split("/", 3)
271                 if len(cat) < 4:
272                     log.debug("Weird category? %s" % cat)
273                     continue
274
275                 if cat[2] == "state":
276                     if cat[3] == "com.google/starred":
277                         self.add_utag(entry, tags_to_add, "starred")
278                 elif cat[2] == "label":
279                     self.add_utag(entry, tags_to_add, cat[3])
280
281             # If this is the first time we've paired an item up with its
282             # Inoreader data, our state is better, so sync it to Inoreader, and
283             # then skip the remainder of the logic to remove canto state/tags
284
285             if "canto-inoreader-sync" not in entry:
286                 sync_state_to(entry, entry, True)
287                 entry["canto-inoreader-sync"] = True
288                 continue
289
290             if "canto-state" not in entry or type(entry["canto-state"]) != list:
291                 continue
292
293             if "read" in entry["canto-state"] and not has_ino_tag(entry, "read"):
294                 entry["canto-state"].remove("read")
295
296             if "canto-tags" not in entry or type(entry["canto-tags"]) != list:
297                 continue
298
299             for tag in entry["canto-tags"][:]:
300                 if not has_ino_tag(entry, tag.split(":", 1)[1]):
301                     entry["canto-tags"].remove(tag)
302                     tags_to_remove.append((self.feed._cacheitem(entry)["id"], tag))
303
304 # For canto communicating to Inoreader, we tap into the relevant hooks to
305 # pickup state / tag changes, and convert that into Inoreader API calls.
306
307 def post_setattributes(socket, args):
308     for item_id in args.keys():
309         dict_id = json.loads(item_id)
310
311         feed = allfeeds.get_feed(dict_id["URL"])
312
313         attrs = feed.get_attributes([item_id], { item_id :\
314                 ["inoreader_id", "inoreader_categories", "canto-state", "canto-tags"] })
315         attrs = attrs[item_id]
316
317         # If the inoreader_id isn't right (likely empty since get_attributes
318         # will sub in "") then skip synchronizing this item.
319
320         ino_id = attrs["inoreader_id"]
321         if not ino_id.startswith("tag:google.com,2005:reader/item/"):
322             continue
323
324         sync_state_to(args[item_id], attrs)
325
326 on_hook("daemon_post_setattributes", post_setattributes)
327
328 def post_setconfigs(socket, args):
329     if "feeds" in args:
330         for feed in args["feeds"]:
331             inoreader_add_sub(feed["url"], feed["name"])
332
333 on_hook("daemon_post_setconfigs", post_setconfigs)
334
335 def post_delconfigs(socket, args):
336     if "feeds" in args:
337         for feed in args["feeds"]:
338             inoreader_del_sub(feed["url"])
339
340 on_hook("daemon_post_delconfigs", post_delconfigs)
341
342 # Do the initial feed synchronization. This only occurs once per run, and
343 # assumes Inoreader knows everything.
344
345 def on_daemon_serving():
346     log.debug("Synchronizing subscriptions.")
347     ino_subs = inoreader_get_subs()
348
349     for sub in ino_subs:
350         url = sub["url"]
351         name = sub["title"]
352
353         for c_feed in config.json["feeds"]:
354             if c_feed["url"] == url:
355                 break
356         else:
357             log.debug("New feed: %s" % url)
358             call_hook("daemon_set_configs", [ None, { "feeds" : [ { "name" : name, "url" : url } ] } ])
359
360     for c_feed in config.json["feeds"]:
361         url = c_feed["url"]
362
363         for sub in ino_subs:
364             if sub["url"] == url:
365                 break
366         else:
367             log.debug("Old feed: %s" % url)
368             call_hook("daemon_del_configs", [ None, { "feeds" : [ c_feed ] } ] )
369
370 on_hook("daemon_serving", on_daemon_serving)