source of geminispace.info - the search provider for gemini space
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

33 lines
1.1 KiB

from whoosh.fields import TEXT
from whoosh.index import open_dir
from whoosh.query import Every
from gus.lib.gemini import GeminiResource
from gus.lib.whoosh_extensions import UrlAnalyzer
def main():
ix = open_dir("index")
with ix.writer() as writer:
writer.add_field("domain", TEXT(analyzer=UrlAnalyzer()))
with ix.searcher() as searcher:
query = Every()
results = searcher.search(query, limit=None)
for result in results:
domain = GeminiResource(result["url"]).normalized_host
print(domain)
with ix.writer() as writer:
writer.delete_document(result.docnum)
writer.add_document(
url = result["url"],
domain = domain,
content_type = result["content_type"],
content = result["content"] if "content" in result else None,
prompt = result["prompt"] if "prompt" in result else None,
indexed_at = result["indexed_at"],
)
if __name__ == "__main__":
main()