Browse Source

[threads] Only work with textual pages

remotes/src/master
Natalie Pendragon 10 months ago
parent
commit
c5b0648dcc
  1. 3
      scripts/build_threads.py

3
scripts/build_threads.py

@ -37,6 +37,7 @@ ON p_to.id == l.to_page_id
WHERE p_from.url IN (?, ?)
AND p_to.normalized_url != ?
AND c.status == 20
AND p_to.content_type LIKE 'text/%'
GROUP BY p_to.normalized_url
ORDER BY l.is_cross_host_like, p_to.url ASC""", u, f"{u}/", resource.normalized_url)
found_threadable_parents = False
@ -124,6 +125,7 @@ ON p_to.id == l.to_page_id
WHERE p_to.url IN (""" + ", ".join(["?" for x in range(len(from_urls))]) + """)
AND p_from.normalized_url != ?
AND c.status == 20
AND p_from.content_type LIKE 'text/%'
GROUP BY p_from.normalized_url
ORDER BY l.is_cross_host_like, first_seen ASC""", *from_urls, resource.normalized_url)
threadable_child_index = 1
@ -181,6 +183,7 @@ LEFT JOIN threadpage AS tp
ON tp.page_id == p.id
WHERE tp.page_id IS NULL
AND c.status == 20
AND p.content_type LIKE 'text/%'
GROUP BY p.normalized_url
""")
for page in pages_query.iterator():

Loading…
Cancel
Save