from collections.abc import Mapping, Sequence
from typing import Any, Optional, Union, cast
from bs4 import BeautifulSoup, Tag # type: ignore[attr-defined]
from hamcrest import all_of, anything, contains_exactly, has_entry, has_item
from hamcrest.core.base_matcher import BaseMatcher
from hamcrest.core.description import Description
from hamcrest.core.helpers.wrap_matcher import wrap_matcher
from hamcrest.core.matcher import Matcher
ANYTHING = anything()
ATTR_MATCHER = Union[Matcher[Mapping[str, Union[str, Matcher[str]]]], Mapping[str, Union[str, Matcher[str]]]]
[docs]
class HtmlWithTag(BaseMatcher[str]):
def __init__(
self,
tag_matcher: Union[str, Matcher[Tag]],
name: Optional[str] = None,
id_: Optional[str] = None,
) -> None:
self.name = name
self.id_ = id_
self.tag_matcher: Matcher[Tag] = (
tag_matcher if isinstance(tag_matcher, Matcher) else tag_has_string(cast("str", tag_matcher))
)
def _matches(self, actual: str) -> bool:
found_tags: Sequence[Tag] = self.findall(actual)
return cast("Matcher[Sequence[Tag]]", has_item(self.tag_matcher)).matches(found_tags)
def findall(self, actual: str) -> Sequence[Tag]:
soup = actual if isinstance(actual, Tag) else BeautifulSoup(actual, "html.parser")
return soup.find_all(self.name, id=self.id_) if self.id_ else soup.find_all(self.name)
[docs]
def describe_to(self, description: Description) -> None:
description.append_text("HTML with tag")
if self.name:
description.append_text(" name=").append_description_of(self.name)
if self.id_:
description.append_text(" id=").append_description_of(self.id_)
description.append_text(" matching ").append_description_of(self.tag_matcher)
[docs]
def describe_mismatch(self, actual, mismatch_description: Description) -> None:
mismatch_description.append_text("got HTML with tag")
if self.name:
mismatch_description.append_text(" name=").append_description_of(self.name)
if self.id_:
mismatch_description.append_text(" id=").append_description_of(self.id_)
found = self.findall(actual)
mismatch_description.append_list(" values [", ", ", "]", [repr(t) for t in found])
[docs]
class TagWith(BaseMatcher[Tag]):
def __init__(
self,
name: Union[str, Matcher[str]] = ANYTHING,
string: Union[str, Matcher[str]] = ANYTHING,
clazz: Union[str, Matcher[str]] = ANYTHING,
attributes: Union[
Mapping[str, Union[str, Matcher[str]]],
Matcher[Mapping[str, Union[str, Matcher[str]]]],
] = ANYTHING,
) -> None:
self.name: Matcher[str] = wrap_matcher(name)
self.string: Matcher[str] = wrap_matcher(string)
self.clazz: Matcher[str] = wrap_matcher(clazz)
self.attributes: Matcher[Mapping[str, Union[str, Matcher[str]]]] = wrap_matcher(attributes)
def _matches(self, tag: Tag) -> bool:
# TODO - remove type ignore when https://github.com/python/mypy/issues/3283 is resolved.
return (
self.name.matches(tag.name)
and self.string.matches(tag.string or "")
and (self.clazz == ANYTHING or has_item(self.clazz).matches(tag.get("class", []))) # type: ignore[arg-type]
and self.attributes.matches(cast("Mapping[str, Any]", tag.attrs))
)
[docs]
def describe_to(self, description: Description) -> None:
description.append_text("tag with")
if self.name != ANYTHING:
description.append_text(" name matching ").append_description_of(self.name)
if self.string != ANYTHING:
description.append_text(" string matching ").append_description_of(self.string)
if self.clazz != ANYTHING:
description.append_text(" class matching ").append_description_of(self.clazz)
if self.attributes != ANYTHING:
description.append_text(" attributes matching ").append_description_of(self.attributes)
[docs]
class HtmlHasTable(BaseMatcher[str]):
def __init__(self, table_matcher: Matcher[Tag], id_: Union[str, Matcher[str]] = ANYTHING) -> None:
self.table_matcher = table_matcher
self.id_: Matcher[str] = wrap_matcher(id_)
def _matches(self, html: str) -> bool:
# TODO - remove type ignore when https://github.com/python/mypy/issues/3283 is resolved.
tables = BeautifulSoup(html, "html.parser").find_all("table")
return contains_exactly(all_of(self.id_, self.table_matcher)).matches(tables) # type: ignore[arg-type]
[docs]
def describe_to(self, description: Description) -> None:
description.append_text("row matching ")
self.table_matcher.describe_to(description)
[docs]
class TableHasRow(BaseMatcher[Tag]):
def __init__(
self,
row_matcher: Matcher[Tag] = ANYTHING,
cells_matcher: Matcher[Sequence[Tag]] = ANYTHING,
index_matcher: Union[int, Matcher[int]] = ANYTHING,
*,
header_row: bool = False,
) -> None:
self.row_matcher = row_matcher
self.cells_matcher = cells_matcher
self.header_row = header_row
self.index_matcher: Matcher[int] = wrap_matcher(index_matcher)
def _matches(self, table: Tag) -> bool:
# TODO - remove type ignore when https://github.com/python/mypy/issues/3283 is resolved.
rows: Sequence[Tag] = table.find_all("tr")
rows_and_cells = [(row, self._row_cells(row)) for row in rows if self._row_cells(row)]
indexed_rows_and_cells = [(index, row, cells) for index, (row, cells) in enumerate(rows_and_cells)]
indexed_row_matcher = cast(
"Matcher[tuple[int, Tag, Sequence[Tag]]]",
contains_exactly(self.index_matcher, self.row_matcher, self.cells_matcher),
)
return has_item(indexed_row_matcher).matches(indexed_rows_and_cells) # type: ignore[arg-type]
def _row_cells(self, row: Tag) -> Sequence[Tag]:
return row.find_all("th" if self.header_row else "td")
[docs]
def describe_to(self, description: Description) -> None:
description.append_text(f"table with {'header ' if self.header_row else ''}row")
if self.cells_matcher != ANYTHING:
description.append_text(" cells matching ")
self.cells_matcher.describe_to(description)
if self.row_matcher != ANYTHING:
description.append_text(" row matching ")
self.row_matcher.describe_to(description)
if self.index_matcher != ANYTHING:
description.append_text(" index matching ")
self.index_matcher.describe_to(description)
[docs]
def describe_mismatch(self, table: Tag, mismatch_description: Description) -> None:
super().describe_mismatch(table, mismatch_description)
mismatch_description.append_text("\n\nfound rows:\n").append_list("", "\n", "", table.find_all("tr"))
[docs]
def has_title(title: Union[str, Matcher[str]]) -> HtmlWithTag:
"""Matches HTML containing a <title> tag with the specified text content.
:param title: The string content or a matcher for the content of the title tag.
"""
return HtmlWithTag(TagWith(string=title), name="title")
[docs]
def has_named_tag(name, matcher) -> HtmlWithTag:
"""Matches HTML containing a tag with a specific name that satisfies a matcher.
:param name: The HTML tag name to find (e.g., 'div', 'span').
:param matcher: A matcher to apply to the found tag(s).
"""
return HtmlWithTag(matcher, name=name)
[docs]
def has_id_tag(id_, matcher) -> HtmlWithTag:
"""Matches HTML containing a tag with a specific 'id' attribute that satisfies a matcher.
:param id_: The HTML id attribute to find.
:param matcher: A matcher to apply to the found tag(s).
"""
return HtmlWithTag(matcher, id_=id_)
[docs]
def tag_has_string(matcher: Union[str, Matcher[str]]) -> TagWith:
"""Matches a BeautifulSoup Tag if its text content matches the given criteria.
:param matcher: A string or string matcher to validate against the tag's content.
"""
return TagWith(string=matcher)
[docs]
def has_class(clazz: Union[str, Matcher[str]]) -> TagWith:
"""Matches a BeautifulSoup Tag if it possesses the specified CSS class.
:param clazz: A string or string matcher to find within the tag's 'class' attribute list.
"""
return TagWith(clazz=clazz)
[docs]
def has_table(matcher, id_=ANYTHING) -> HtmlHasTable:
"""Matches HTML containing a <table> element satisfying the given table matcher.
:param matcher: A matcher to apply to the table Tag.
:param id_: Optional matcher or string for the table's 'id' attribute.
"""
return HtmlHasTable(matcher, id_=id_)
[docs]
def has_row(row_matches=ANYTHING, cells_match=ANYTHING, index_matches=ANYTHING, *, header_row=False) -> TableHasRow:
"""Matches a table Tag if it contains a row satisfying the specified criteria.
:param row_matches: Matcher for the <tr> Tag itself.
:param cells_match: Matcher for the sequence of cells (<td>) within the row.
:param index_matches: Matcher for the row's index within the table.
:param header_row: If True, looks for <th> cells instead of <td>.
"""
return TableHasRow(
row_matcher=row_matches,
cells_matcher=cells_match,
index_matcher=index_matches,
header_row=header_row,
)
[docs]
def has_id(id_: Union[str, Matcher[str]]) -> TagWith:
"""Matches a BeautifulSoup Tag if it has the specified element ID.
:param id_: The string ID or a matcher for the ID.
"""
return TagWith(attributes=has_entry("id", id_))
[docs]
def has_attributes(
matcher: Union[Mapping[str, Union[str, Matcher[str]]], Matcher[Mapping[str, Union[str, Matcher[str]]]]],
) -> TagWith:
"""Matches a BeautifulSoup Tag if its attributes dictionary matches the provided matcher.
:param matcher: A dictionary or matcher to validate the tag's attributes.
"""
return TagWith(attributes=matcher)
[docs]
def has_link(
id_: Union[str, Matcher[str]] = ANYTHING,
clazz: Union[str, Matcher[str]] = ANYTHING,
href: Union[str, Matcher[str]] = ANYTHING,
) -> HtmlWithTag:
"""Matches HTML containing an anchor (<a>) tag with specific attributes.
:param id_: Matcher or string for the 'id' attribute.
:param clazz: Matcher or string for the 'class' attribute.
:param href: Matcher or string for the 'href' attribute.
"""
href_matcher: ATTR_MATCHER = has_entry("href", href) if href != ANYTHING else ANYTHING
id_matcher: ATTR_MATCHER = has_entry("id", id_) if id_ != ANYTHING else ANYTHING
return HtmlWithTag(TagWith(name="a", clazz=clazz, attributes=all_of(href_matcher, id_matcher)))
[docs]
def has_image(
id_: Union[str, Matcher[str]] = ANYTHING,
clazz: Union[str, Matcher[str]] = ANYTHING,
src: Union[str, Matcher[str]] = ANYTHING,
) -> HtmlWithTag:
"""Matches HTML containing an image (<img>) tag with specific attributes.
:param id_: Matcher or string for the 'id' attribute.
:param clazz: Matcher or string for the 'class' attribute.
:param src: Matcher or string for the 'src' attribute.
"""
src_matcher: ATTR_MATCHER = has_entry("src", src) if src != ANYTHING else ANYTHING
id_matcher: ATTR_MATCHER = has_entry("id", id_) if id_ != ANYTHING else ANYTHING
return HtmlWithTag(TagWith(name="img", clazz=clazz, attributes=all_of(src_matcher, id_matcher)))