14c6167550c14dd88164aedc33ce4846ec81e93f
[canto-next.git] / plugins / sync-inoreader.py
1 # Canto Inoreader Plugin
2 # by Jack Miller
3 # v0.4
4
5 # DEPENDENCIES
6
7 # This plugin requires the 'requests' module, which can usually be found in
8 # your distro's package manager as python3-requests (or python-requests on
9 # Arch).
10
11 # IMPORTANT NOTES
12
13 # - When this plugin is enabled, canto will synchronize your subscribed feeds
14 # with Inoreader. If you've been using canto, you should export your feeds
15 # (canto-remote export > feeds.opml) and import them into Inoreader if you
16 # don't want to lose your feeds because Inoreader's info is assumed to be more
17 # correct than ours.
18 #
19 # - Feed subscriptions are only synchronized *from Inoreader* on startup, so if
20 # you add / remove feeds with Inoreader, you will have to restart the daemon to
21 # pickup the changes. Adding or removing feeds with canto works properly all
22 # the time.
23 #
24 # - You must have a standard Inoreader account, not an OAuth (Google/Facebook
25 # login).
26
27 # CONFIGURATION
28
29 # Inoreader credentials
30
31 EMAIL="somebody@somewhere.com"
32 PASSWORD="passw0rd"
33
34 # You don't *have* to change these, but the API is rate limited. So if you want
35 # to avoid rate limit issues, register your own application Preferences ->
36 # Developer options on the Inoreader site and replace these.
37
38 APP_ID="1000001299"
39 APP_KEY="i0UOUtLQjj2WTre8WA3a9GWt_cgDhpkO"
40
41 BASE_URL="https://www.inoreader.com/reader/"
42
43 # === You shouldn't have to change anything past this line. ===
44
45 from canto_next.plugins import check_program
46
47 check_program("canto-daemon")
48
49 from canto_next.fetch import DaemonFetchThreadPlugin
50 from canto_next.feed import DaemonFeedPlugin, allfeeds
51 from canto_next.hooks import call_hook, on_hook
52 from canto_next.config import config
53
54 from urllib.parse import urlencode, quote
55 import traceback
56 import requests
57 import logging
58 import time
59 import json
60
61 log = logging.getLogger("SYNC-INOREADER")
62
63 class InoreaderReqFailed(Exception):
64     pass
65
66 class InoreaderAuthFailed(Exception):
67     pass
68
69 class CantoInoreaderAPI():
70     def __init__(self):
71         self.extra_headers = {
72                 "User-Agent" : "Canto/0.9.0 + http://codezen.org/canto-ng",
73                 "AppKey" : APP_KEY,
74                 "AppID" : APP_ID,
75         }
76
77         try:
78             self.authorization = self.auth()
79         except:
80             self.authorization = None
81
82         self.dead = False
83
84         self.add_tags_queued = {}
85         self.del_tags_queued = {}
86
87     def auth(self):
88         headers = self.extra_headers.copy()
89         headers['Email'] = EMAIL
90         headers['Passwd'] = PASSWORD
91
92         try:
93             r = requests.get("https://www.inoreader.com/accounts/ClientLogin", headers, timeout=1)
94         except Exception as e:
95             raise InoreaderReqFailed(str(e))
96
97         if r.status_code != 200:
98             raise InoreaderAuthFailed("Failed to authorize: [%s] %s" % (r.status_code, r.text))
99
100         for line in r.text.splitlines():
101             if line.startswith("Auth="):
102                 log.debug("authorization: %s", line[5:])
103                 return line[5:]
104
105         raise InoreaderAuthFailed("Failed to find Auth= in auth response")
106
107     def inoreader_req(self, path, query = {}):
108         tries = 3
109         r = {}
110
111         while tries and not self.dead:
112             tries -= 1
113             if not self.authorization:
114                 try:
115                     self.authorization = self.auth()
116                 except InoreaderReqFailed as e:
117                     log.debug("Auth request failed: %s", e)
118                     continue
119                 except InoreaderAuthFailed:
120                     log.error("Inoreader authorization failed, please check your credentials in sync-inoreader.py")
121                     self.dead = True
122                     raise
123
124             headers = self.extra_headers.copy()
125             headers["Authorization"] = "GoogleLogin auth=" + self.authorization
126
127             try:
128                 r = requests.get(BASE_URL + path, params=query, headers=headers, timeout=1)
129             except requests.exceptions.Timeout:
130                 raise InoreaderReqFailed
131
132             if r.status_code != 200:
133                 log.debug("STATUS %s", r.status_code)
134                 log.debug(r.headers)
135                 log.debug(r.text)
136             else:
137                 return r
138
139             # No authorization, attempt to get another code on the next try.
140
141             if r.status_code == 401:
142                 self.authorization = None
143             elif r.status_code == 429:
144                 log.error("Inoreader rate limit reached.")
145                 self.dead = True
146             elif r.status_code == 503:
147                 log.error("Inoreader appears down, state may be lost")
148
149         raise InoreaderReqFailed
150
151     # Convert special tags into /state/com.google/tag and others into
152     # /label/tag, useful when matching without knowing the user.
153
154     def full_ino_tag_suffix(self, tag):
155         if tag in ["read", "starred", "fresh"]:
156             return "/state/com.google/" + tag
157         return "/label/" + tag
158
159     # Add the user/- prefix to go upstream to Inoreader.
160
161     def full_ino_tag(self, tag):
162         return "user/-" + self.full_ino_tag_suffix(tag)
163
164     # Do the opposite, convert an Inoreader tag into a natural name.  (i.e.)
165     # /user/whatever/state/com.google/read -> read
166
167     def strip_ino_tag(self, tag):
168         tag = tag.split("/", 3)
169         if tag[2] == "state":
170             return tag[3].split("/", 1)[1]
171         return tag[3]
172
173     # Return whether Inoreader data includes this natural tag
174
175     def has_tag(self, item, tag):
176         if "canto_inoreader_categories" not in item:
177             return False
178
179         suff = self.full_ino_tag_suffix(tag)
180         for category in item["canto_inoreader_categories"]:
181             if category.endswith(suff):
182                 return True
183         return False
184
185     def add_tag(self, item, tag):
186         ino_id = item["canto_inoreader_id"]
187         if not self.has_tag(item, tag):
188             if tag in self.add_tags_queued:
189                 self.add_tags_queued[tag].append(ino_id)
190             else:
191                 self.add_tags_queued[tag] = [ino_id]
192
193     def remove_tag(self, item, tag):
194         ino_id = item["canto_inoreader_id"]
195         if self.has_tag(item, tag):
196             if tag in self.del_tags_queued:
197                 self.del_tags_queued[tag].append(ino_id)
198             else:
199                 self.del_tags_queued[tag] = [ino_id]
200
201     def _urllimit(self, prefix, ino_ids):
202         t = prefix
203         l = len(t)
204
205         for i, ino_id in enumerate(ino_ids):
206             if l + len(ino_id) > 2048:
207                 self.inoreader_req(t)
208                 return ino_ids[i:]
209             t += ino_id
210             l += len(ino_id)
211
212         self.inoreader_req(t)
213         return []
214
215     def flush_changes(self):
216         for key in self.add_tags_queued:
217             to_add = [ "&i=" + quote(x) for x in self.add_tags_queued[key]]
218             while to_add:
219                 to_add = self._urllimit("api/0/edit-tag?a=" + quote(self.full_ino_tag(key)), to_add)
220
221         for key in self.del_tags_queued:
222             to_del = [ "&i=" + quote(x) for x in self.del_tags_queued[key]]
223             while to_del:
224                 to_del = self._urllimit("api/0/edit-tag?r=" + quote(self.full_ino_tag(key)), to_del)
225
226         self.add_tags_queued = {}
227         self.del_tags_queued = {}
228
229     def get_subs(self):
230         return self.inoreader_req("api/0/subscription/list").json()["subscriptions"]
231
232     def add_sub(self, feed_url, title):
233         query = {
234             "ac" : "subscribe",
235             "s" : "feed/" + feed_url,
236             "t" : title
237         }
238
239         self.inoreader_req("api/0/subscription/edit", query)
240
241     def del_sub(self, feed_url):
242         query = {
243             "ac" : "unsubscribe",
244             "s" : "feed/" + feed_url
245         }
246
247         self.inoreader_req("api/0/subscription/edit", query)
248
249 api = CantoInoreaderAPI()
250
251 # Given a change set, and the current attributes of a canto item, tell
252 # Inoreader about it.
253
254 def sync_state_to(changes, attrs, add_only = False):
255     if "canto-state" in changes:
256         if "read" in changes["canto-state"]:
257             api.add_tag(attrs, "read")
258         elif not add_only:
259             if api.has_tag(attrs, "read"):
260                 inoreader_remove_tag(attrs["canto_inoreader_id"], "read")
261
262     if "canto-tags" in changes:
263         for tag in changes["canto-tags"]:
264             tag = tag.split(":", 1)[1] # strip user: or category: prefix
265             if not api.has_tag(attrs, tag):
266                 api.add_tag(attrs, tag)
267
268         if add_only:
269             return
270
271         for tag in attrs["canto_inoreader_categories"]:
272             tag = strip_ino_tag(tag)
273             if "user:" + tag not in changes[item_id]["canto-tags"]:
274                 inoreader_remove_tag(attrs["canto_inoreader_id"], tag)
275
276 class CantoFeedInoReader(DaemonFeedPlugin):
277     def __init__(self, feed):
278         self.plugin_attrs = { "edit_inoreader_sync" : self.edit_inoreader_sync,
279                 "additems_inoreader" : self.additems_inoreader }
280         self.feed = feed
281         self.ino_data = None
282
283     def _list_add(self, item, attr, new):
284         if attr not in item:
285             item[attr] = [ new ]
286         elif new not in item[attr]:
287             item[attr].append(new)
288
289     def add_utag(self, item, tags_to_add, tag):
290         self._list_add(item, "canto-tags", "user:" + tag)
291         tags_to_add.append((item, "user:" + tag))
292
293     def add_state(self, item, state):
294         self._list_add(item, "canto-state", state)
295
296     def additems_inoreader(self, **kwargs):
297         feed = kwargs["feed"]
298         newcontent = kwargs["newcontent"]
299         tags_to_add = kwargs["tags_to_add"]
300         tags_to_remove = kwargs["tags_to_remove"]
301         remove_items = kwargs["remove_items"]
302
303         stream_id = quote("feed/" + feed.URL, [])
304
305         query = { "n" : 1000 }
306
307         # Collect all of the items
308
309         self.ino_data = []
310
311         content_path = "api/0/stream/contents/" + stream_id
312
313         try:
314             r = api.inoreader_req(content_path, query).json()
315             self.ino_data.extend(r["items"])
316         except (InoreaderAuthFailed, InoreaderReqFailed):
317             return
318         except Exception as e:
319             log.debug("EXCEPT: %s", traceback.format_exc())
320
321         # Find items that were inserted last time, and remove them, potentially
322         # adding them to our fresh Inoreader data.
323
324         # This keeps us from getting dupes when Inoreader finds an item, we
325         # insert it, and then a real copy comes to canto but canto doesn't
326         # detect the dupe since the ids are different.
327
328         for canto_entry in newcontent["entries"][:]:
329             if "canto-from-inoreader" not in canto_entry:
330                 continue
331
332             remove_items.append(canto_entry)
333             tags_to_add = [ x for x in tags_to_add if x[0] != canto_entry]
334
335             newcontent["entries"].remove(canto_entry)
336
337             for ino_entry in self.ino_data[:]:
338                 if canto_entry["id"] == ino_entry["id"]:
339                     break
340             else:
341                 self.ino_data.append(canto_entry)
342
343         # Now insert (or re-insert) items that aren't already in our data.
344
345         # NOTE: It's okay if re-inserted items are also in remove_ids, since
346         # that's processed first, and will be cancelled out by adding the tags
347         # afterwards.
348
349         for ino_entry in self.ino_data:
350             for canto_entry in newcontent["entries"]:
351                 if ino_entry["canonical"][0]["href"] != canto_entry["link"]:
352                     continue
353                 if ino_entry["id"] == canto_entry["id"]:
354                     canto_entry["canto-from-inoreader"] = True
355                 break
356             else:
357                 if "canto-from-inoreader" not in ino_entry:
358                     # feedparser compatibility
359                     ino_entry["summary"] = ino_entry["summary"]["content"]
360                     ino_entry["link"] = ino_entry["canonical"][0]["href"]
361
362                     # mark this item as from inoreader (missing from feed)
363                     ino_entry["canto-from-inoreader"] = True
364
365                 newcontent["entries"].append(ino_entry)
366                 tags_to_add.append((ino_entry, "maintag:" + feed.name ))
367
368     def edit_inoreader_sync(self, **kwargs):
369         feed = kwargs["feed"]
370         newcontent = kwargs["newcontent"]
371         tags_to_add = kwargs["tags_to_add"]
372         tags_to_remove = kwargs["tags_to_remove"]
373
374         # Add inoreader_id/categories information to the items
375
376         # This is very similar to the loop in additems_inoreader, but needs to
377         # be separate in case other plugins add items that inoreader might
378         # track.
379
380         for ino_entry in self.ino_data:
381             for canto_entry in newcontent["entries"][:]:
382                 if ino_entry["canonical"][0]["href"] != canto_entry["link"]:
383                     continue
384                 canto_entry["canto_inoreader_id"] = ino_entry["id"]
385                 canto_entry["canto_inoreader_categories"] = ino_entry["categories"]
386                 break
387
388         # Drop the data.
389         self.ino_data = None
390
391         for entry in newcontent["entries"]:
392             # If we didn't get an id for this item, skip it
393
394             if "canto_inoreader_id" not in entry:
395                 continue
396
397             for category in entry["canto_inoreader_categories"]:
398                 if category.endswith("/state/com.google/read"):
399                     self.add_state(entry, "read")
400                     continue
401
402                 cat = category.split("/", 3)
403                 if len(cat) < 4:
404                     log.debug("Weird category? %s", cat)
405                     continue
406
407                 if cat[2] == "state":
408                     if cat[3] == "com.google/starred":
409                         self.add_utag(entry, tags_to_add, "starred")
410                 elif cat[2] == "label":
411                     self.add_utag(entry, tags_to_add, cat[3])
412
413             # If this is the first time we've paired an item up with its
414             # Inoreader data, our state is better, so sync it to Inoreader, and
415             # then skip the remainder of the logic to remove canto state/tags
416
417             if "canto-inoreader-sync" not in entry:
418                 sync_state_to(entry, entry, True)
419                 entry["canto-inoreader-sync"] = True
420                 continue
421
422             if "canto-state" not in entry or type(entry["canto-state"]) != list:
423                 continue
424
425             # It appears that if an item is "fresh" it will resist all attempts
426             # to set it as read?
427
428             if "read" in entry["canto-state"] and not\
429                     (api.has_tag(entry, "read") or api.has_tag(entry, "fresh")):
430                 log.debug("Marking unread from Inoreader")
431                 entry["canto-state"].remove("read")
432
433             if "canto-tags" not in entry or type(entry["canto-tags"]) != list:
434                 continue
435
436             for tag in entry["canto-tags"][:]:
437                 if not api.has_tag(entry, tag.split(":", 1)[1]):
438                     entry["canto-tags"].remove(tag)
439                     tags_to_remove.append((entry, tag))
440
441         api.flush_changes()
442
443 # For canto communicating to Inoreader, we tap into the relevant hooks to
444 # pickup state / tag changes, and convert that into Inoreader API calls.
445
446 def post_setattributes(socket, args):
447     for item_id in args.keys():
448         dict_id = json.loads(item_id)
449
450         feed = allfeeds.get_feed(dict_id["URL"])
451
452         attrs = feed.get_attributes([item_id], { item_id :\
453                 ["canto_inoreader_id", "canto_inoreader_categories", "canto-state", "canto-tags"] })
454         attrs = attrs[item_id]
455
456         # If the canto_inoreader_id isn't right (likely empty since get_attributes
457         # will sub in "") then skip synchronizing this item.
458
459         ino_id = attrs["canto_inoreader_id"]
460         if not ino_id.startswith("tag:google.com,2005:reader/item/"):
461             continue
462
463         sync_state_to(args[item_id], attrs)
464
465     api.flush_changes()
466
467 on_hook("daemon_post_setattributes", post_setattributes)
468
469 def post_setconfigs(socket, args):
470     if "feeds" in args:
471         for feed in args["feeds"]:
472             api.add_sub(feed["url"], feed["name"])
473
474 on_hook("daemon_post_setconfigs", post_setconfigs)
475
476 def post_delconfigs(socket, args):
477     if "feeds" in args:
478         for feed in args["feeds"]:
479             api.del_sub(feed["url"])
480
481 on_hook("daemon_post_delconfigs", post_delconfigs)
482
483 # Do the initial feed synchronization. This only occurs once per run, and
484 # assumes Inoreader knows everything.
485
486 def on_daemon_serving():
487     log.debug("Synchronizing subscriptions.")
488     ino_subs = api.get_subs()
489
490     for c_feed in config.json["feeds"]:
491         url = c_feed["url"]
492
493         for sub in ino_subs:
494             if sub["url"] == url:
495                 break
496         else:
497             log.debug("Old feed: %s", url)
498             call_hook("daemon_del_configs", [ None, { "feeds" : [ c_feed ] } ] )
499
500     for sub in ino_subs:
501         url = sub["url"]
502         name = sub["title"]
503
504         for c_feed in config.json["feeds"]:
505             if c_feed["url"] == url:
506                 break
507             if c_feed["name"] == name:
508                 log.info("Found feed with same name, but not URL? Skipping.")
509                 break
510         else:
511             log.debug("New feed: %s", url)
512             call_hook("daemon_set_configs", [ None, { "feeds" : [ { "name" : name, "url" : url } ] } ])
513
514
515 on_hook("daemon_serving", on_daemon_serving)