fix(rpc): improve type=search performance

This patch brings in the use of .with_entities on our
RPC search query. This primarily fixes performance issues
we were seeing with large queries.

That being said, we do see a bit of a slowdown on
large record count rpc queries, but it's quite negligible
at this point.

We still do aim to perform better than the older PHP
implementation, so this is not a finishing patch by
any means.

Signed-off-by: Kevin Morris <kevr@0cost.org>
This commit is contained in:
Kevin Morris 2022-01-13 23:27:10 -08:00
parent d31a51742b
commit b4495a49bf
No known key found for this signature in database
GPG key ID: F7E46DED420788F3
2 changed files with 45 additions and 28 deletions

View file

@ -4,7 +4,7 @@ from collections import defaultdict
from typing import Any, Callable, Dict, List, NewType, Union
from fastapi.responses import HTMLResponse
from sqlalchemy import and_, literal
from sqlalchemy import and_, literal, orm
import aurweb.config as config
@ -123,34 +123,27 @@ class RPC:
# Produce RPC API compatible Popularity: If zero, it's an integer
# 0, otherwise, it's formatted to the 6th decimal place.
pop = package.PackageBase.Popularity
pop = package.Popularity
pop = 0 if not pop else float(util.number_format(pop, 6))
snapshot_uri = config.get("options", "snapshot_uri")
data = defaultdict(list)
data.update({
return {
"ID": package.ID,
"Name": package.Name,
"PackageBaseID": package.PackageBaseID,
"PackageBase": package.PackageBase.Name,
"PackageBase": package.PackageBaseName,
# Maintainer should be set following this update if one exists.
"Maintainer": None,
"Maintainer": package.Maintainer,
"Version": package.Version,
"Description": package.Description,
"URL": package.URL,
"URLPath": snapshot_uri % package.Name,
"NumVotes": package.PackageBase.NumVotes,
"NumVotes": package.NumVotes,
"Popularity": pop,
"OutOfDate": package.PackageBase.OutOfDateTS,
"FirstSubmitted": package.PackageBase.SubmittedTS,
"LastModified": package.PackageBase.ModifiedTS
})
if package.PackageBase.Maintainer is not None:
# We do have a maintainer: set the Maintainer key.
data["Maintainer"] = package.PackageBase.Maintainer.Username
return data
"OutOfDate": package.OutOfDateTS,
"FirstSubmitted": package.SubmittedTS,
"LastModified": package.ModifiedTS
}
def _get_info_json_data(self, package: models.Package) -> Dict[str, Any]:
data = self._get_json_data(package)
@ -178,19 +171,38 @@ class RPC:
:param packages: A list of Package instances or a Package ORM query
:param data_generator: Generator callable of single-Package JSON data
"""
output = []
for pkg in packages:
db.refresh(pkg)
output.append(data_generator(pkg))
return output
return [data_generator(pkg) for pkg in packages]
def _entities(self, query: orm.Query) -> orm.Query:
""" Select specific RPC columns on `query`. """
return query.with_entities(
models.Package.ID,
models.Package.Name,
models.Package.Version,
models.Package.Description,
models.Package.URL,
models.Package.PackageBaseID,
models.PackageBase.Name.label("PackageBaseName"),
models.PackageBase.NumVotes,
models.PackageBase.Popularity,
models.PackageBase.OutOfDateTS,
models.PackageBase.SubmittedTS,
models.PackageBase.ModifiedTS,
models.User.Username.label("Maintainer"),
).group_by(models.Package.ID)
def _handle_multiinfo_type(self, args: List[str] = [], **kwargs) \
-> List[Dict[str, Any]]:
self._enforce_args(args)
args = set(args)
packages = db.query(models.Package).join(models.PackageBase).filter(
models.Package.Name.in_(args))
packages = db.query(models.Package).join(models.PackageBase).join(
models.User,
models.User.ID == models.PackageBase.MaintainerUID,
isouter=True
).filter(models.Package.Name.in_(args))
packages = self._entities(packages)
ids = {pkg.ID for pkg in packages}
# Aliases for 80-width.
@ -293,7 +305,7 @@ class RPC:
search.search_by(by, arg)
max_results = config.getint("options", "max_rpc_results")
results = search.results().limit(max_results)
results = self._entities(search.results()).limit(max_results)
return self._assemble_json_data(results, self._get_json_data)
def _handle_msearch_type(self, args: List[str] = [], **kwargs)\