Source code for brunns.matchers.rss

import logging
from datetime import datetime
from typing import Optional, Union

import feedparser
import httpx
from furl import furl
from hamcrest import anything
from hamcrest.core.base_matcher import BaseMatcher, T
from hamcrest.core.description import Description
from hamcrest.core.helpers.wrap_matcher import wrap_matcher
from hamcrest.core.matcher import Matcher
from yarl import URL

from brunns.matchers.utils import append_matcher_description, describe_field_match, describe_field_mismatch

logger = logging.getLogger(__name__)
ANYTHING = anything()


[docs] class RssFeedMatcher(BaseMatcher[str]): def __init__(self): self.title: Matcher[str] = ANYTHING self.link: Matcher[URL] = ANYTHING self.description: Matcher[str] = ANYTHING self.published: Matcher[Union[datetime, None]] = ANYTHING self.entries: Matcher[list[feedparser.FeedParserDict]] = ANYTHING def _matches(self, item: Union[str, URL, furl]) -> bool: try: actual = feedparser.parse(str(item)) except (ValueError, httpx.HTTPError): return False else: if not actual.feed: return False published = self._get_published_date(actual.feed) return ( self.title.matches(actual.feed.get("title", "")) and self.link.matches(URL(actual.feed.get("link", ""))) and self.description.matches(actual.feed.get("description", "")) and self.published.matches(published) and self.entries.matches(actual.entries) )
[docs] def describe_to(self, description: Description) -> None: description.append_text("RSS feed with") append_matcher_description(self.title, "title", description) append_matcher_description(self.link, "link", description) append_matcher_description(self.description, "description", description) append_matcher_description(self.published, "published", description) append_matcher_description(self.entries, "entries", description)
[docs] def describe_mismatch(self, item: str, mismatch_description: Description) -> None: try: actual = feedparser.parse(str(item)) except ValueError as e: mismatch_description.append_text(f"RSS parsing failed with '{e}'\nfor value {item}") except httpx.HTTPError as e: mismatch_description.append_text(f"HTTP error '{e}'\nfor URL {item}") else: if not actual.feed: mismatch_description.append_text(f"RSS feed was empty/invalid for value {item}") return mismatch_description.append_text("was RSS feed with") describe_field_mismatch(self.title, "title", actual.feed.get("title", ""), mismatch_description) describe_field_mismatch(self.link, "link", URL(actual.feed.get("link", "")), mismatch_description) describe_field_mismatch( self.description, "description", actual.feed.get("description", ""), mismatch_description ) published = self._get_published_date(actual.feed) describe_field_mismatch(self.published, "published", published, mismatch_description) describe_field_mismatch(self.entries, "entries", actual.entries, mismatch_description)
[docs] def describe_match(self, item: T, match_description: Description) -> None: actual = feedparser.parse(str(item)) match_description.append_text("was RSS feed with") describe_field_match(self.title, "title", actual.feed.get("title", ""), match_description) describe_field_match(self.link, "link", URL(actual.feed.get("link", "")), match_description) describe_field_match(self.description, "description", actual.feed.get("description", ""), match_description) published = self._get_published_date(actual.feed) describe_field_match(self.published, "published", published, match_description) describe_field_match(self.entries, "entries", actual.entries, match_description)
def _get_published_date(self, feed) -> Optional[datetime]: return datetime.strptime(feed.published, "%a, %d %b %Y %H:%M:%S %z") if "published" in feed else None def with_title(self, title: Union[str, Matcher[str]]): self.title = wrap_matcher(title) return self def and_title(self, title: Union[str, Matcher[str]]): return self.with_title(title) def with_link(self, link: Union[URL, Matcher[URL]]): self.link = wrap_matcher(link) return self def and_link(self, link: Union[URL, Matcher[URL]]): return self.with_link(link) def with_description(self, description: Union[str, Matcher[str]]): self.description = wrap_matcher(description) return self def and_description(self, description: Union[str, Matcher[str]]): return self.with_description(description) def with_published(self, published: Union[datetime, None, Matcher[Union[datetime, None]]]): self.published = wrap_matcher(published) return self def and_published(self, published: Union[datetime, None, Matcher[Union[datetime, None]]]): return self.with_published(published) def with_entries(self, entries: Union[list[feedparser.FeedParserDict], Matcher[list[feedparser.FeedParserDict]]]): self.entries = wrap_matcher(entries) return self def and_entries(self, entries: Union[list[feedparser.FeedParserDict], Matcher[list[feedparser.FeedParserDict]]]): return self.with_entries(entries)
[docs] class RssFeedEntryMatcher(BaseMatcher[feedparser.FeedParserDict]): def __init__(self): self.title: Matcher[str] = ANYTHING self.link: Matcher[URL] = ANYTHING self.description: Matcher[str] = ANYTHING self.published: Matcher[Union[datetime, None]] = ANYTHING def _matches(self, item: feedparser.FeedParserDict) -> bool: published = self._get_published_date(item) return ( self.title.matches(item.get("title", "")) and self.link.matches(URL(item.get("link", ""))) and self.description.matches(item.get("description", "")) and self.published.matches(published) )
[docs] def describe_to(self, description: Description) -> None: description.append_text("RSS feed entry with") append_matcher_description(self.title, "title", description) append_matcher_description(self.link, "link", description) append_matcher_description(self.description, "description", description) append_matcher_description(self.published, "published", description)
[docs] def describe_match(self, item: feedparser.FeedParserDict, match_description: Description) -> None: match_description.append_text("was RSS feed entry with") describe_field_match(self.title, "title", item.get("title", ""), match_description) describe_field_match(self.link, "link", URL(item.get("link", "")), match_description) describe_field_match(self.description, "description", item.get("description", ""), match_description) published = self._get_published_date(item) describe_field_match(self.published, "published", published, match_description)
[docs] def describe_mismatch(self, item: feedparser.FeedParserDict, mismatch_description: Description) -> None: mismatch_description.append_text("was RSS feed entry with") describe_field_mismatch(self.title, "title", item.get("title", ""), mismatch_description) describe_field_mismatch(self.link, "link", URL(item.get("link", "")), mismatch_description) describe_field_mismatch(self.description, "description", item.get("description", ""), mismatch_description) published = self._get_published_date(item) describe_field_mismatch(self.published, "published", published, mismatch_description)
def _get_published_date(self, entry: feedparser.FeedParserDict) -> Optional[datetime]: return datetime.strptime(entry["published"], "%a, %d %b %Y %H:%M:%S %z") if "published" in entry else None def with_title(self, title: Union[str, Matcher[str]]): self.title = wrap_matcher(title) return self def and_title(self, title: Union[str, Matcher[str]]): return self.with_title(title) def with_link(self, link: Union[URL, Matcher[URL]]): self.link = wrap_matcher(link) return self def and_link(self, link: Union[URL, Matcher[URL]]): return self.with_link(link) def with_description(self, description: Union[str, Matcher[str]]): self.description = wrap_matcher(description) return self def and_description(self, description: Union[str, Matcher[str]]): return self.with_description(description) def with_published(self, published: Union[datetime, None, Matcher[Union[datetime, None]]]): self.published = wrap_matcher(published) return self def and_published(self, published: Union[datetime, None, Matcher[Union[datetime, None]]]): return self.with_published(published)
[docs] def is_rss_feed() -> RssFeedMatcher: """Matches a string (or URL-like object) as an RSS feed using ``feedparser``. The string is parsed as an RSS feed, and the resulting structure is checked. This matcher uses a builder pattern (e.g., ``.with_title(...)``) to refine the match. :return: A matcher for RSS feed content. """ return RssFeedMatcher()
[docs] def is_rss_entry() -> RssFeedEntryMatcher: """Matches a single RSS feed entry (item) within an RSS feed. This matcher operates on ``feedparser.FeedParserDict`` objects, typically found in the ``entries`` list of a parsed feed. :return: A matcher for an RSS feed entry. """ return RssFeedEntryMatcher()