sync-inoreader: quiet debug, fix exception return
[canto-next.git] / plugins / sync-inoreader.py
1 # Canto Inoreader Plugin
2 # by Jack Miller
3 # v0.4
4
5 # DEPENDENCIES
6
7 # This plugin requires the 'requests' module, which can usually be found in
8 # your distro's package manager as python3-requests (or python-requests on
9 # Arch).
10
11 # IMPORTANT NOTES
12
13 # - When this plugin is enabled, canto will synchronize your subscribed feeds
14 # with Inoreader. If you've been using canto, you should export your feeds
15 # (canto-remote export > feeds.opml) and import them into Inoreader if you
16 # don't want to lose your feeds because Inoreader's info is assumed to be more
17 # correct than ours.
18 #
19 # - Feed subscriptions are only synchronized *from Inoreader* on startup, so if
20 # you add / remove feeds with Inoreader, you will have to restart the daemon to
21 # pickup the changes. Adding or removing feeds with canto works properly all
22 # the time.
23 #
24 # - You must have a standard Inoreader account, not an OAuth (Google/Facebook
25 # login).
26
27 # CONFIGURATION
28
29 # Inoreader credentials
30
31 EMAIL="somebody@somewhere.com"
32 PASSWORD="passw0rd"
33
34 # You don't *have* to change these, but the API is rate limited. So if you want
35 # to avoid rate limit issues, register your own application Preferences ->
36 # Developer options on the Inoreader site and replace these.
37
38 APP_ID="1000001299"
39 APP_KEY="i0UOUtLQjj2WTre8WA3a9GWt_cgDhpkO"
40
41 BASE_URL="https://www.inoreader.com/reader/"
42
43 # === You shouldn't have to change anything past this line. ===
44
45 from canto_next.plugins import check_program
46
47 check_program("canto-daemon")
48
49 from canto_next.fetch import DaemonFetchThreadPlugin
50 from canto_next.feed import DaemonFeedPlugin, allfeeds
51 from canto_next.hooks import call_hook, on_hook
52 from canto_next.config import config
53
54 from urllib.parse import urlencode, quote
55 import traceback
56 import requests
57 import logging
58 import time
59 import json
60
61 log = logging.getLogger("SYNC-INOREADER")
62
63 class InoreaderReqFailed(Exception):
64     pass
65
66 class InoreaderAuthFailed(Exception):
67     pass
68
69 class CantoInoreaderAPI():
70     def __init__(self):
71         self.extra_headers = {
72                 "User-Agent" : "Canto/0.9.0 + http://codezen.org/canto-ng",
73                 "AppKey" : APP_KEY,
74                 "AppID" : APP_ID,
75         }
76
77         try:
78             self.authorization = self.auth()
79         except:
80             self.authorization = None
81
82         self.dead = False
83
84         self.add_tags_queued = {}
85         self.del_tags_queued = {}
86
87     def auth(self):
88         headers = self.extra_headers.copy()
89         headers['Email'] = EMAIL
90         headers['Passwd'] = PASSWORD
91
92         try:
93             r = requests.get("https://www.inoreader.com/accounts/ClientLogin", headers, timeout=1)
94         except Exception as e:
95             raise InoreaderReqFailed(str(e))
96
97         if r.status_code != 200:
98             raise InoreaderAuthFailed("Failed to authorize: [%s] %s" % (r.status_code, r.text))
99
100         for line in r.text.splitlines():
101             if line.startswith("Auth="):
102                 log.debug("authorization: %s", line[5:])
103                 return line[5:]
104
105         raise InoreaderAuthFailed("Failed to find Auth= in auth response")
106
107     def inoreader_req(self, path, query = {}):
108         tries = 3
109         r = {}
110
111         while tries and not self.dead:
112             tries -= 1
113             if not self.authorization:
114                 try:
115                     self.authorization = self.auth()
116                 except InoreaderReqFailed as e:
117                     log.debug("Auth request failed: %s", e)
118                     continue
119                 except InoreaderAuthFailed:
120                     log.error("Inoreader authorization failed, please check your credentials in sync-inoreader.py")
121                     self.dead = True
122                     raise
123
124             headers = self.extra_headers.copy()
125             headers["Authorization"] = "GoogleLogin auth=" + self.authorization
126
127             try:
128                 r = requests.get(BASE_URL + path, params=query, headers=headers, timeout=1)
129             except requests.exceptions.Timeout:
130                 raise InoreaderReqFailed
131
132             if r.status_code != 200:
133                 log.debug("STATUS %s", r.status_code)
134                 log.debug(r.headers)
135                 log.debug(r.text)
136             else:
137                 return r
138
139             # No authorization, attempt to get another code on the next try.
140
141             if r.status_code == 401:
142                 self.authorization = None
143             elif r.status_code == 429:
144                 log.error("Inoreader rate limit reached.")
145                 self.dead = True
146             elif r.status_code == 503:
147                 log.error("Inoreader appears down, state may be lost")
148
149         raise InoreaderReqFailed
150
151     # Convert special tags into /state/com.google/tag and others into
152     # /label/tag, useful when matching without knowing the user.
153
154     def full_ino_tag_suffix(self, tag):
155         if tag in ["read", "starred", "fresh"]:
156             return "/state/com.google/" + tag
157         return "/label/" + tag
158
159     # Add the user/- prefix to go upstream to Inoreader.
160
161     def full_ino_tag(self, tag):
162         return "user/-" + self.full_ino_tag_suffix(tag)
163
164     # Do the opposite, convert an Inoreader tag into a natural name.  (i.e.)
165     # /user/whatever/state/com.google/read -> read
166
167     def strip_ino_tag(self, tag):
168         tag = tag.split("/", 3)
169         if tag[2] == "state":
170             return tag[3].split("/", 1)[1]
171         return tag[3]
172
173     # Return whether Inoreader data includes this natural tag
174
175     def has_tag(self, item, tag):
176         if "canto_inoreader_categories" not in item:
177             return False
178
179         suff = self.full_ino_tag_suffix(tag)
180         for category in item["canto_inoreader_categories"]:
181             if category.endswith(suff):
182                 return True
183         return False
184
185     def add_tag(self, item, tag):
186         ino_id = item["canto_inoreader_id"]
187         if not self.has_tag(item, tag):
188             if tag in self.add_tags_queued:
189                 self.add_tags_queued[tag].append(ino_id)
190             else:
191                 self.add_tags_queued[tag] = [ino_id]
192
193     def remove_tag(self, item, tag):
194         ino_id = item["canto_inoreader_id"]
195         if self.has_tag(item, tag):
196             if tag in self.del_tags_queued:
197                 self.del_tags_queued[tag].append(ino_id)
198             else:
199                 self.del_tags_queued[tag] = [ino_id]
200
201     def _urllimit(self, prefix, ino_ids):
202         t = prefix
203         l = len(t)
204
205         for i, ino_id in enumerate(ino_ids):
206             if l + len(ino_id) > 2048:
207                 self.inoreader_req(t)
208                 return ino_ids[i:]
209             t += ino_id
210             l += len(ino_id)
211
212         self.inoreader_req(t)
213         return []
214
215     def flush_changes(self):
216         for key in self.add_tags_queued:
217             to_add = [ "&i=" + quote(x) for x in self.add_tags_queued[key]]
218             while to_add:
219                 to_add = self._urllimit("api/0/edit-tag?a=" + quote(self.full_ino_tag(key)), to_add)
220
221         for key in self.del_tags_queued:
222             to_del = [ "&i=" + quote(x) for x in self.del_tags_queued[key]]
223             while to_del:
224                 to_del = self._urllimit("api/0/edit-tag?r=" + quote(self.full_ino_tag(key)), to_del)
225
226         self.add_tags_queued = {}
227         self.del_tags_queued = {}
228
229     def get_subs(self):
230         return self.inoreader_req("api/0/subscription/list").json()["subscriptions"]
231
232     def add_sub(self, feed_url, title):
233         query = {
234             "ac" : "subscribe",
235             "s" : "feed/" + feed_url,
236             "t" : title
237         }
238
239         self.inoreader_req("api/0/subscription/edit", query)
240
241     def del_sub(self, feed_url):
242         query = {
243             "ac" : "unsubscribe",
244             "s" : "feed/" + feed_url
245         }
246
247         self.inoreader_req("api/0/subscription/edit", query)
248
249 api = CantoInoreaderAPI()
250
251 # Given a change set, and the current attributes of a canto item, tell
252 # Inoreader about it.
253
254 def sync_state_to(changes, attrs, add_only = False):
255     if "canto-state" in changes:
256         if "read" in changes["canto-state"]:
257             api.add_tag(attrs, "read")
258         elif not add_only:
259             if api.has_tag(attrs, "read"):
260                 inoreader_remove_tag(attrs["canto_inoreader_id"], "read")
261
262     if "canto-tags" in changes:
263         for tag in changes["canto-tags"]:
264             tag = tag.split(":", 1)[1] # strip user: or category: prefix
265             if not api.has_tag(attrs, tag):
266                 api.add_tag(attrs, tag)
267
268         if add_only:
269             return
270
271         for tag in attrs["canto_inoreader_categories"]:
272             tag = strip_ino_tag(tag)
273             if "user:" + tag not in changes[item_id]["canto-tags"]:
274                 inoreader_remove_tag(attrs["canto_inoreader_id"], tag)
275
276 class CantoFeedInoReader(DaemonFeedPlugin):
277     def __init__(self, feed):
278         self.plugin_attrs = { "edit_inoreader_sync" : self.edit_inoreader_sync,
279                 "additems_inoreader" : self.additems_inoreader }
280         self.feed = feed
281         self.ino_data = None
282
283     def _list_add(self, item, attr, new):
284         if attr not in item:
285             item[attr] = [ new ]
286         elif new not in item[attr]:
287             item[attr].append(new)
288
289     def add_utag(self, item, tags_to_add, tag):
290         self._list_add(item, "canto-tags", "user:" + tag)
291         tags_to_add.append((item, "user:" + tag))
292
293     def add_state(self, item, state):
294         self._list_add(item, "canto-state", state)
295
296     def additems_inoreader(self, feed, newcontent, tags_to_add, tags_to_remove, remove_items):
297         stream_id = quote("feed/" + feed.URL, [])
298
299         query = { "n" : 1000 }
300
301         # Collect all of the items
302
303         self.ino_data = []
304
305         content_path = "api/0/stream/contents/" + stream_id
306
307         try:
308             r = api.inoreader_req(content_path, query).json()
309             self.ino_data.extend(r["items"])
310         except (InoreaderAuthFailed, InoreaderReqFailed):
311             return (tags_to_add, tags_to_remove, remove_items)
312         except Exception as e:
313             log.debug("EXCEPT: %s", traceback.format_exc())
314             raise
315
316         # Find items that were inserted last time, and remove them, potentially
317         # adding them to our fresh Inoreader data.
318
319         # This keeps us from getting dupes when Inoreader finds an item, we
320         # insert it, and then a real copy comes to canto but canto doesn't
321         # detect the dupe since the ids are different.
322
323         for canto_entry in newcontent["entries"][:]:
324             if "canto-from-inoreader" not in canto_entry:
325                 continue
326
327             remove_items.append(canto_entry)
328             tags_to_add = [ x for x in tags_to_add if x[0] != canto_entry]
329
330             newcontent["entries"].remove(canto_entry)
331
332             for ino_entry in self.ino_data[:]:
333                 if canto_entry["id"] == ino_entry["id"]:
334                     break
335             else:
336                 self.ino_data.append(canto_entry)
337
338         # Now insert (or re-insert) items that aren't already in our data.
339
340         # NOTE: It's okay if re-inserted items are also in remove_ids, since
341         # that's processed first, and will be cancelled out by adding the tags
342         # afterwards.
343
344         for ino_entry in self.ino_data:
345             for canto_entry in newcontent["entries"]:
346                 if ino_entry["canonical"][0]["href"] != canto_entry["link"]:
347                     continue
348                 if ino_entry["id"] == canto_entry["id"]:
349                     canto_entry["canto-from-inoreader"] = True
350                 break
351             else:
352                 if "canto-from-inoreader" not in ino_entry:
353                     # feedparser compatibility
354                     ino_entry["summary"] = ino_entry["summary"]["content"]
355                     ino_entry["link"] = ino_entry["canonical"][0]["href"]
356
357                     # mark this item as from inoreader (missing from feed)
358                     ino_entry["canto-from-inoreader"] = True
359
360                 newcontent["entries"].append(ino_entry)
361                 tags_to_add.append((ino_entry, "maintag:" + feed.name ))
362
363         return (tags_to_add, tags_to_remove, remove_items)
364
365     def edit_inoreader_sync(self, feed, newcontent, tags_to_add, tags_to_remove, remove_items):
366
367         # Add inoreader_id/categories information to the items
368
369         # This is very similar to the loop in additems_inoreader, but needs to
370         # be separate in case other plugins add items that inoreader might
371         # track.
372
373         for ino_entry in self.ino_data:
374             for canto_entry in newcontent["entries"][:]:
375                 if ino_entry["canonical"][0]["href"] != canto_entry["link"]:
376                     continue
377                 canto_entry["canto_inoreader_id"] = ino_entry["id"]
378                 canto_entry["canto_inoreader_categories"] = ino_entry["categories"]
379                 break
380
381         # Drop the data.
382         self.ino_data = None
383
384         for entry in newcontent["entries"]:
385             # If we didn't get an id for this item, skip it
386
387             if "canto_inoreader_id" not in entry:
388                 continue
389
390             for category in entry["canto_inoreader_categories"]:
391                 if category.endswith("/state/com.google/read"):
392                     self.add_state(entry, "read")
393                     continue
394
395                 cat = category.split("/", 3)
396                 if len(cat) < 4:
397                     log.debug("Weird category? %s", cat)
398                     continue
399
400                 if cat[2] == "state":
401                     if cat[3] == "com.google/starred":
402                         self.add_utag(entry, tags_to_add, "starred")
403                 elif cat[2] == "label":
404                     self.add_utag(entry, tags_to_add, cat[3])
405
406             # If this is the first time we've paired an item up with its
407             # Inoreader data, our state is better, so sync it to Inoreader, and
408             # then skip the remainder of the logic to remove canto state/tags
409
410             if "canto-inoreader-sync" not in entry:
411                 sync_state_to(entry, entry, True)
412                 entry["canto-inoreader-sync"] = True
413                 continue
414
415             if "canto-state" not in entry or type(entry["canto-state"]) != list:
416                 continue
417
418             # It appears that if an item is "fresh" it will resist all attempts
419             # to set it as read?
420
421             if "read" in entry["canto-state"] and not\
422                     (api.has_tag(entry, "read") or api.has_tag(entry, "fresh")):
423                 log.debug("Marking unread from Inoreader")
424                 entry["canto-state"].remove("read")
425
426             if "canto-tags" not in entry or type(entry["canto-tags"]) != list:
427                 continue
428
429             for tag in entry["canto-tags"][:]:
430                 if not api.has_tag(entry, tag.split(":", 1)[1]):
431                     entry["canto-tags"].remove(tag)
432                     tags_to_remove.append((entry, tag))
433
434         api.flush_changes()
435         return (tags_to_add, tags_to_remove, remove_items)
436
437 # For canto communicating to Inoreader, we tap into the relevant hooks to
438 # pickup state / tag changes, and convert that into Inoreader API calls.
439
440 def post_setattributes(socket, args):
441     for item_id in args.keys():
442         dict_id = json.loads(item_id)
443
444         feed = allfeeds.get_feed(dict_id["URL"])
445
446         attrs = feed.get_attributes([item_id], { item_id :\
447                 ["canto_inoreader_id", "canto_inoreader_categories", "canto-state", "canto-tags"] })
448         attrs = attrs[item_id]
449
450         # If the canto_inoreader_id isn't right (likely empty since get_attributes
451         # will sub in "") then skip synchronizing this item.
452
453         ino_id = attrs["canto_inoreader_id"]
454         if not ino_id.startswith("tag:google.com,2005:reader/item/"):
455             continue
456
457         sync_state_to(args[item_id], attrs)
458
459     api.flush_changes()
460
461 on_hook("daemon_post_setattributes", post_setattributes)
462
463 def post_setconfigs(socket, args):
464     if "feeds" in args:
465         for feed in args["feeds"]:
466             api.add_sub(feed["url"], feed["name"])
467
468 on_hook("daemon_post_setconfigs", post_setconfigs)
469
470 def post_delconfigs(socket, args):
471     if "feeds" in args:
472         for feed in args["feeds"]:
473             api.del_sub(feed["url"])
474
475 on_hook("daemon_post_delconfigs", post_delconfigs)
476
477 # Do the initial feed synchronization. This only occurs once per run, and
478 # assumes Inoreader knows everything.
479
480 def on_daemon_serving():
481     log.debug("Synchronizing subscriptions.")
482     ino_subs = api.get_subs()
483
484     for c_feed in config.json["feeds"]:
485         url = c_feed["url"]
486
487         for sub in ino_subs:
488             if sub["url"] == url:
489                 break
490         else:
491             log.debug("Old feed: %s", url)
492             call_hook("daemon_del_configs", [ None, { "feeds" : [ c_feed ] } ] )
493
494     for sub in ino_subs:
495         url = sub["url"]
496         name = sub["title"]
497
498         for c_feed in config.json["feeds"]:
499             if c_feed["url"] == url:
500                 break
501             if c_feed["name"] == name:
502                 log.info("Found feed with same name, but not URL? Skipping.")
503                 break
504         else:
505             log.debug("New feed: %s", url)
506             call_hook("daemon_set_configs", [ None, { "feeds" : [ { "name" : name, "url" : url } ] } ])
507
508
509 on_hook("daemon_serving", on_daemon_serving)