source of geminispace.info - the search provider for gemini space
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

130 lines
4.1 KiB

import pytest
from gus.lib.gemini import GeminiResource, GeminiRobotFileParser
class TestGeminiResource:
def test_extract_contained_resources(self):
url = "gemini://host"
# no content
resources = GeminiResource(url).extract_contained_resources("")
assert resources == []
# not a link
resources = GeminiResource(url).extract_contained_resources(" => link")
assert resources == []
resources = GeminiResource(url).extract_contained_resources(
"```\n=> preformatted\n```"
)
assert resources == []
# some links
resources = GeminiResource(url).extract_contained_resources(
"=> link\ntext\n=> other"
)
assert len(resources) == 2
assert resources[0].raw_url == "link"
assert resources[1].raw_url == "other"
resources = GeminiResource(url).extract_contained_resources(
"""
# title
text
=> link
text
``` preformatted
=> no link
```
=> other
"""
)
assert len(resources) == 2
assert resources[0].raw_url == "link"
assert resources[1].raw_url == "other"
@pytest.mark.parametrize("test_input,expected_result", [
(["gemini://gus.guru", None, None], [True, "gemini://gus.guru", "gus.guru"]),
(["/bar", "gemini://gus.guru/foo", None], [False, None, None]),
(["/bar", "gemini://gus.guru/foo/", None], [False, None, None]),
(["/bar", "gemini://gus.guru/foo", "gus.guru"], [True, "gemini://gus.guru/bar", "gus.guru"]),
(["/bar", "gemini://gus.guru/foo/", "gus.guru"], [True, "gemini://gus.guru/bar", "gus.guru"]),
(["bar", "gemini://gus.guru/foo", "gus.guru"], [True, "gemini://gus.guru/bar", "gus.guru"]),
(["bar", "gemini://gus.guru/foo/", "gus.guru"], [True, "gemini://gus.guru/foo/bar", "gus.guru"]),
# (["bar", "gemini://gus.guru/foo", "gus.guru"], [True, "gemini://gus.guru/foo/bar", "gus.guru"]),
(["//foo.com", None, None], [True, "gemini://foo.com", "foo.com"]),
])
def test_url_parsing(self, test_input, expected_result):
gr = GeminiResource(test_input[0], test_input[1], test_input[2])
assert gr.is_valid == expected_result[0]
assert gr.normalized_url == expected_result[1]
assert gr.normalized_host == expected_result[2]
@pytest.mark.parametrize("test_url,expected_result", [
("gemini://gus.guru", True),
("gemini://gus.guru/", True),
("gemini://gus.guru/franz", False),
("gemini://gus.guru/~franz", True),
("gemini://gus.guru/~franz/foo", False),
])
def test_is_root_like(self, test_url, expected_result):
gr = GeminiResource(test_url)
assert gr.is_root_like == expected_result
class TestGeminiRobotFileParser:
def _get_parser(self, content):
dummy_url = "gemini://dummy/robots.txt"
rp = GeminiRobotFileParser(dummy_url)
rp.read_from_string(content)
return rp
def _assert_fetchable(self, rp, url="/", fetchable=True):
useragents = ["testbot", "genericbot", "*"]
assert rp.can_fetch_prioritized(useragents, url) == fetchable
def test_empty_robots(self):
rp = self._get_parser("")
self._assert_fetchable(rp)
def test_disallow_star(self):
rp = self._get_parser("""User-agent: *
Disallow: /""")
self._assert_fetchable(rp, "/", False)
def test_allow_genericbot(self):
rp = self._get_parser("""User-agent: *
Disallow: /
User-agent: genericbot
Allow: /""")
self._assert_fetchable(rp)
def test_allow_genericbot_but_disallow_testbot(self):
rp = self._get_parser("""User-agent: genericbot
Allow: /
User-agent: testbot
Disallow: /""")
self._assert_fetchable(rp, "/", False)
def test_allow_star_but_disallow_genericbot(self):
rp = self._get_parser("""User-agent: *
Allow: /
User-agent: genericbot
Disallow: /""")
self._assert_fetchable(rp, "/", False)
def test_allow_only_testbot(self):
rp = self._get_parser("""User-agent: *
Disallow: /
User-agent: genericbot
Disallow: /
User-agent: testbot
Allow: /""")
self._assert_fetchable(rp)