feat: cache package search results with Redis

The queries being done on the package search page are quite costly.
(Especially the default one ordered by "Popularity" when navigating to /packages)

Let's add the search results to the Redis cache:
Every result of a search query is being pushed to Redis until we hit our maximum of 50k.
An entry expires after 3 minutes before it's evicted from the cache.
Lifetime an Max values are configurable.

Signed-off-by: moson-mo <mo-son@mailbox.org>
This commit is contained in:
moson-mo 2023-07-02 01:06:34 +02:00
parent 7c8b9ba6bc
commit 3acfb08a0f
No known key found for this signature in database
GPG key ID: 4A4760AB4EE15296
8 changed files with 173 additions and 74 deletions

View file

@ -1,21 +1,43 @@
from redis import Redis
import pickle
from sqlalchemy import orm
from aurweb import config
from aurweb.aur_redis import redis_connection
async def db_count_cache(
redis: Redis, key: str, query: orm.Query, expire: int = None
) -> int:
_redis = redis_connection()
async def db_count_cache(key: str, query: orm.Query, expire: int = None) -> int:
"""Store and retrieve a query.count() via redis cache.
:param redis: Redis handle
:param key: Redis key
:param query: SQLAlchemy ORM query
:param expire: Optional expiration in seconds
:return: query.count()
"""
result = redis.get(key)
result = _redis.get(key)
if result is None:
redis.set(key, (result := int(query.count())))
_redis.set(key, (result := int(query.count())))
if expire:
redis.expire(key, expire)
_redis.expire(key, expire)
return int(result)
async def db_query_cache(key: str, query: orm.Query, expire: int = None):
"""Store and retrieve query results via redis cache.
:param key: Redis key
:param query: SQLAlchemy ORM query
:param expire: Optional expiration in seconds
:return: query.all()
"""
result = _redis.get(key)
if result is None:
if _redis.dbsize() > config.getint("cache", "max_search_entries", 50000):
return query.all()
_redis.set(key, (result := pickle.dumps(query.all())), ex=expire)
if expire:
_redis.expire(key, expire)
return pickle.loads(result)

View file

@ -89,22 +89,20 @@ async def index(request: Request):
bases = db.query(models.PackageBase)
redis = aurweb.aur_redis.redis_connection()
cache_expire = 300 # Five minutes.
cache_expire = aurweb.config.getint("cache", "expiry_time")
# Package statistics.
context["package_count"] = await db_count_cache(
redis, "package_count", bases, expire=cache_expire
"package_count", bases, expire=cache_expire
)
query = bases.filter(models.PackageBase.MaintainerUID.is_(None))
context["orphan_count"] = await db_count_cache(
redis, "orphan_count", query, expire=cache_expire
"orphan_count", query, expire=cache_expire
)
query = db.query(models.User)
context["user_count"] = await db_count_cache(
redis, "user_count", query, expire=cache_expire
"user_count", query, expire=cache_expire
)
query = query.filter(
@ -114,7 +112,7 @@ async def index(request: Request):
)
)
context["trusted_user_count"] = await db_count_cache(
redis, "trusted_user_count", query, expire=cache_expire
"trusted_user_count", query, expire=cache_expire
)
# Current timestamp.
@ -130,26 +128,26 @@ async def index(request: Request):
query = bases.filter(models.PackageBase.SubmittedTS >= seven_days_ago)
context["seven_days_old_added"] = await db_count_cache(
redis, "seven_days_old_added", query, expire=cache_expire
"seven_days_old_added", query, expire=cache_expire
)
query = updated.filter(models.PackageBase.ModifiedTS >= seven_days_ago)
context["seven_days_old_updated"] = await db_count_cache(
redis, "seven_days_old_updated", query, expire=cache_expire
"seven_days_old_updated", query, expire=cache_expire
)
year = seven_days * 52 # Fifty two weeks worth: one year.
year_ago = now - year
query = updated.filter(models.PackageBase.ModifiedTS >= year_ago)
context["year_old_updated"] = await db_count_cache(
redis, "year_old_updated", query, expire=cache_expire
"year_old_updated", query, expire=cache_expire
)
query = bases.filter(
models.PackageBase.ModifiedTS - models.PackageBase.SubmittedTS < 3600
)
context["never_updated"] = await db_count_cache(
redis, "never_updated", query, expire=cache_expire
"never_updated", query, expire=cache_expire
)
# Get the 15 most recently updated packages.

View file

@ -7,6 +7,7 @@ from fastapi import APIRouter, Form, Query, Request, Response
import aurweb.filters # noqa: F401
from aurweb import aur_logging, config, db, defaults, models, util
from aurweb.auth import creds, requires_auth
from aurweb.cache import db_count_cache, db_query_cache
from aurweb.exceptions import InvariantError, handle_form_exceptions
from aurweb.models.relation_type import CONFLICTS_ID, PROVIDES_ID, REPLACES_ID
from aurweb.packages import util as pkgutil
@ -14,6 +15,7 @@ from aurweb.packages.search import PackageSearch
from aurweb.packages.util import get_pkg_or_base
from aurweb.pkgbase import actions as pkgbase_actions, util as pkgbaseutil
from aurweb.templates import make_context, make_variable_context, render_template
from aurweb.util import hash_query
logger = aur_logging.get_logger(__name__)
router = APIRouter()
@ -87,7 +89,11 @@ async def packages_get(
# Collect search result count here; we've applied our keywords.
# Including more query operations below, like ordering, will
# increase the amount of time required to collect a count.
num_packages = search.count()
# we use redis for caching the results of the query
cache_expire = config.getint("cache", "expiry_time")
num_packages = await db_count_cache(
hash_query(search.query), search.query, cache_expire
)
# Apply user-specified sort column and ordering.
search.sort_by(sort_by, sort_order)
@ -108,7 +114,12 @@ async def packages_get(
models.PackageNotification.PackageBaseID.label("Notify"),
)
packages = results.limit(per_page).offset(offset)
# paging
results = results.limit(per_page).offset(offset)
# we use redis for caching the results of the query
packages = await db_query_cache(hash_query(results), results, cache_expire)
context["packages"] = packages
context["packages_count"] = num_packages

View file

@ -4,6 +4,7 @@ import secrets
import shlex
import string
from datetime import datetime
from hashlib import sha1
from http import HTTPStatus
from subprocess import PIPE, Popen
from typing import Callable, Iterable, Tuple, Union
@ -13,6 +14,7 @@ import fastapi
import pygit2
from email_validator import EmailSyntaxError, validate_email
from fastapi.responses import JSONResponse
from sqlalchemy.orm import Query
import aurweb.config
from aurweb import aur_logging, defaults
@ -200,3 +202,9 @@ def shell_exec(cmdline: str, cwd: str) -> Tuple[int, str, str]:
proc = Popen(args, cwd=cwd, stdout=PIPE, stderr=PIPE)
out, err = proc.communicate()
return proc.returncode, out.decode().strip(), err.decode().strip()
def hash_query(query: Query):
return sha1(
str(query.statement.compile(compile_kwargs={"literal_binds": True})).encode()
).hexdigest()