Skip to content

Commit

Permalink
refactor: move calc_next_check_as_per_server_side_cache into web.WebFeed
Browse files Browse the repository at this point in the history
Signed-off-by: Rongrong <[email protected]>
  • Loading branch information
Rongronggg9 committed Nov 4, 2024
1 parent 5734530 commit 137281a
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 30 deletions.
31 changes: 1 addition & 30 deletions src/monitor/_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ async def _do_monitor_a_feed(self, feed: db.Feed, now: datetime):
feed.etag = etag
feed_updated_fields.add('etag')

new_next_check_time = _defer_next_check_as_per_server_side_cache(wf)
new_next_check_time = wf.calc_next_check_as_per_server_side_cache()

if not rss_d.entries: # empty
logger.debug(f'Fetched (not updated, empty): {feed.link}')
Expand Down Expand Up @@ -362,32 +362,3 @@ async def _do_monitor_a_feed(self, feed: db.Feed, now: datetime):
await Notifier(feed=feed, subs=subs, entries=updated_entries).notify_all()
stat.updated()
return


def _defer_next_check_as_per_server_side_cache(wf: web.WebFeed) -> Optional[datetime]:
wr = wf.web_response
assert wr is not None
expires = wr.expires
now = wr.now

# defer next check as per Cloudflare cache
# https://developers.cloudflare.com/cache/concepts/cache-responses/
# https://developers.cloudflare.com/cache/how-to/edge-browser-cache-ttl/
if expires and wf.headers.get('cf-cache-status') in {'HIT', 'MISS', 'EXPIRED', 'REVALIDATED'} and expires > now:
return expires

# defer next check as per RSSHub TTL (or Cache-Control max-age)
# only apply when TTL > 5min,
# as it is the default value of RSSHub and disabling cache won't change it in some legacy versions
rss_d = wf.rss_d
if rss_d.feed.get('generator') == 'RSSHub' and (updated_str := rss_d.feed.get('updated')):
ttl_in_minute_str: str = rss_d.feed.get('ttl', '')
ttl_in_second = int(ttl_in_minute_str) * 60 if ttl_in_minute_str.isdecimal() else None
if ttl_in_second is None:
ttl_in_second = wr.max_age
if ttl_in_second and ttl_in_second > 300:
updated = web.utils.rfc_2822_8601_to_datetime(updated_str)
if updated and (next_check_time := updated + timedelta(seconds=ttl_in_second)) > now:
return next_check_time

return None
38 changes: 38 additions & 0 deletions src/web/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,44 @@ class WebFeed:

web_response: Optional[WebResponse] = None

def calc_next_check_as_per_server_side_cache(self) -> Optional[datetime]:
wr = self.web_response
if wr is None:
return None
now = wr.now

# defer next check as per Cloudflare cache
# https://developers.cloudflare.com/cache/concepts/cache-responses/
# https://developers.cloudflare.com/cache/how-to/edge-browser-cache-ttl/
if (
self.headers.get('cf-cache-status') in {'HIT', 'MISS', 'EXPIRED', 'REVALIDATED'}
and
wr.expires > now
):
return wr.expires

# defer next check as per RSSHub TTL (or Cache-Control max-age)
# only apply when TTL > 5min,
# as it is the default value of RSSHub and disabling cache won't change it in some legacy versions
rss_d = self.rss_d
if (
rss_d.feed.get('generator') == 'RSSHub'
and
(updated_str := rss_d.feed.get('updated'))
):
ttl_in_minute_str: str = rss_d.feed.get('ttl', '')
ttl_in_second = (
int(ttl_in_minute_str) * 60
if ttl_in_minute_str.isdecimal()
else wr.max_age
) or -1
if ttl_in_second > 300:
updated = rfc_2822_8601_to_datetime(updated_str)
if updated and (next_check_time := updated + timedelta(seconds=ttl_in_second)) > now:
return next_check_time

return None


def proxy_filter(url: str, parse: bool = True) -> bool:
if not (env.PROXY_BYPASS_PRIVATE or env.PROXY_BYPASS_DOMAINS):
Expand Down

0 comments on commit 137281a

Please sign in to comment.