Skip to content
This repository has been archived by the owner on Jun 1, 2022. It is now read-only.

Commit

Permalink
serialization: Switch to using orjson instead of json throughout.
Browse files Browse the repository at this point in the history
orjson is considerably faster[1], and handles UTF-8 conformance better.

[1] https://github.com/ijl/orjson#performance
  • Loading branch information
alexmv committed May 17, 2021
1 parent c731778 commit c33b0d0
Show file tree
Hide file tree
Showing 26 changed files with 166 additions and 157 deletions.
7 changes: 3 additions & 4 deletions scripts/dev_copy_locations.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json

import click
import httpx
import orjson
from click.exceptions import ClickException


Expand Down Expand Up @@ -53,7 +52,7 @@ def yield_locations(base_url, source_token):
"GET", base_url, headers={"Authorization": "Bearer {}".format(source_token)}
) as response:
for line in response.iter_lines():
properties = json.loads(line)["properties"]
properties = orjson.loads(line)["properties"]
yield {
key: properties[key]
for key in (
Expand Down Expand Up @@ -86,7 +85,7 @@ def import_batch(batch, destination_url, destination_token):
print(response.text)
raise ClickException(e)
click.echo(response.status_code)
click.echo(json.dumps(response.json(), indent=2))
click.echo(orjson.dumps(response.json(), option=orjson.OPT_INDENT_2))


if __name__ == "__main__":
Expand Down
9 changes: 4 additions & 5 deletions scripts/dev_copy_source_locations.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json

import click
import httpx
import orjson
from click.exceptions import ClickException


Expand Down Expand Up @@ -60,7 +59,7 @@ def yield_source_locations(base_url, source_token):
"GET", base_url, headers={"Authorization": "Bearer {}".format(source_token)}
) as response:
for line in response.iter_lines():
properties = json.loads(line)["properties"]
properties = orjson.loads(line)["properties"]
# We just want source_uid, source_name, name, latitude, longitude, import_json
yield {
key: properties[key]
Expand All @@ -78,7 +77,7 @@ def yield_source_locations(base_url, source_token):
def import_batch(batch, destination_url, destination_token, import_run_id):
response = httpx.post(
destination_url + "?import_run_id={}".format(import_run_id),
data="\n".join(json.dumps(record) for record in batch),
data=b"\n".join(orjson.dumps(record) for record in batch),
headers={"Authorization": "Bearer {}".format(destination_token)},
timeout=20,
)
Expand All @@ -88,7 +87,7 @@ def import_batch(batch, destination_url, destination_token, import_run_id):
print(response.text)
raise ClickException(e)
click.echo(response.status_code)
click.echo(json.dumps(response.json(), indent=2))
click.echo(orjson.dumps(response.json(), option=orjson.OPT_INDENT_2))


if __name__ == "__main__":
Expand Down
8 changes: 4 additions & 4 deletions scripts/replay_api_logs_from_csv.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import csv
import json
import time
from collections import defaultdict
from urllib.parse import urlencode

import click
import httpx
import orjson
import pytz
from dateutil import parser

Expand Down Expand Up @@ -41,8 +41,8 @@ def cli(endpoint, csv_filepath, base_url):
status, data = send_row(row, url)
status_count[status] += 1
if status != 200:
click.echo(json.dumps(data, indent=2), err=True)
click.echo(json.dumps(status_count))
click.echo(orjson.dumps(data, option=orjson.OPT_INDENT_2), err=True)
click.echo(orjson.dumps(status_count))


def send_row(row, url):
Expand All @@ -57,7 +57,7 @@ def send_row(row, url):
"fake_remote_ip": row["remote_ip"],
}
)
payload = json.loads(row["payload"])
payload = orjson.loads(row["payload"])
response = None
errors = []
for i in range(1, 4):
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ extend-ignore = E203,E501
extend-exclude = vaccinate/config/pythonrc.py,vaccinate/config/test_settings.py,vaccinate/config/mypy_settings.py

[pycodestyle]
ignore = E501
ignore = E501,W503
4 changes: 2 additions & 2 deletions vaccinate/api/caller_views/submit_report.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import json
import os
import random
from datetime import date, datetime
from typing import Any, Callable, Dict, List, Optional, Tuple

import beeline
import orjson
import pytz
import requests
from api.models import ApiLog
Expand Down Expand Up @@ -67,7 +67,7 @@ def submit_report(
request: HttpRequest, on_request_logged: Callable[[Callable[[ApiLog], None]], None]
):
try:
post_data = json.loads(request.body.decode("utf-8"))
post_data = orjson.loads(request.body)
except ValueError as e:
return JsonResponse({"error": str(e)}, status=400)
try:
Expand Down
12 changes: 8 additions & 4 deletions vaccinate/api/caller_views/test_submit_report.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import json
import pathlib
import random
import time
from datetime import datetime

import orjson
import pytest
from api.models import ApiLog
from core.models import CallRequest, CallRequestReason, Location, Report, State
Expand Down Expand Up @@ -51,7 +51,10 @@ def test_submit_report_api_invalid_json(client, jwt_id_token):
HTTP_AUTHORIZATION="Bearer {}".format(jwt_id_token),
)
assert response.status_code == 400
assert response.json()["error"] == "Expecting value: line 1 column 1 (char 0)"
assert (
response.json()["error"]
== "expected value at line 1 column 1: line 1 column 1 (char 0)"
)


@pytest.mark.django_db
Expand All @@ -66,7 +69,8 @@ def test_submit_report_api_example(
"ok": True,
},
)
fixture = json.load(json_path.open())
with json_path.open() as fixture_file:
fixture = orjson.loads(fixture_file.read())
assert Report.objects.count() == 0
assert CallRequest.objects.count() == 0
# Ensure location exists
Expand Down Expand Up @@ -179,7 +183,7 @@ def test_submit_report_api_example(

# Should have posted to Zapier
assert mocked_zapier.called_once
assert json.loads(mocked_zapier.last_request.body) == {
assert orjson.loads(mocked_zapier.last_request.body) == {
"report_url": "http://testserver/admin/core/report/{}/change/".format(
report.pk
),
Expand Down
9 changes: 5 additions & 4 deletions vaccinate/api/export_mapbox.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json

import beeline
import orjson
import requests
from core.expansions import VaccineFinderInventoryExpansion
from core.models import Location
Expand Down Expand Up @@ -156,7 +155,7 @@ def export_mapbox_preview(request):
<p><a href="{}">Raw JSON</a></p>
</body></html>
""".format(
escape(json.dumps(preview, indent=4)),
escape(orjson.dumps(preview, option=orjson.OPT_INDENT_2)),
escape(raw_url),
).strip()
)
Expand All @@ -175,7 +174,9 @@ def export_mapbox(request):

post_data = ""
for location in locations.all():
post_data += json.dumps(_mapbox_geojson(location, expansion)) + "\n"
post_data += orjson.dumps(
_mapbox_geojson(location, expansion), option=orjson.OPT_APPEND_NEWLINE
)

access_token = settings.MAPBOX_ACCESS_TOKEN
if not access_token:
Expand Down
12 changes: 8 additions & 4 deletions vaccinate/api/search.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import datetime
import json
from html import escape
from typing import Callable, Dict, Union

import beeline
import orjson
from core.models import ConcordanceIdentifier, Location, SourceLocation, State
from core.utils import keyset_pagination_iterator
from django.contrib.gis.geos import Point
Expand Down Expand Up @@ -118,9 +118,9 @@ def search_locations(
if debug:
if all:
return JsonResponse({"error": "Cannot use both all and debug"}, status=400)
output = "".join(stream())
output = b"".join(stream())
if formatter.content_type == "application/json":
output = json.dumps(json.loads(output), indent=2)
output = orjson.dumps(orjson.loads(output), option=orjson.OPT_INDENT_2)
return render(
request,
"api/search_locations_debug.html",
Expand Down Expand Up @@ -295,7 +295,11 @@ def source_location_json(source_location: SourceLocation) -> Dict[str, object]:
"api/search_locations_debug.html",
{
"output": mark_safe(
escape(json.dumps(json.loads("".join(stream())), indent=2))
escape(
orjson.dumps(
orjson.loads(b"".join(stream())), option=orjson.OPT_INDENT_2
)
)
),
},
)
Expand Down
60 changes: 31 additions & 29 deletions vaccinate/api/serialize.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import itertools
import json
from collections import namedtuple
from typing import Dict

import beeline
import orjson
from core.expansions import VaccineFinderInventoryExpansion
from core.models import Location
from django.db.models.query import QuerySet
Expand Down Expand Up @@ -211,34 +211,34 @@ def transform_batch_geojson(batch):
formats["v0preview"] = OutputFormat(
prepare_queryset=lambda qs: qs.select_related("dn_latest_non_skip_report"),
start=(
'{"usage": {"notice": "Please contact Vaccinate The States and let '
"us know if you plan to rely on or publish this data. This "
"data is provided with best-effort accuracy. If you are "
"displaying this data, we expect you to display it responsibly. "
'Please do not display it in a way that is easy to misread.",'
'"contact": {"partnersEmail": "[email protected]"}},'
'"content": ['
b'{"usage":{"notice":"Please contact Vaccinate The States and let '
b"us know if you plan to rely on or publish this data. This "
b"data is provided with best-effort accuracy. If you are "
b"displaying this data, we expect you to display it responsibly. "
b'Please do not display it in a way that is easy to misread.",'
b'"contact":{"partnersEmail":"[email protected]"}},'
b'"content":['
),
transform=lambda l: location_v0_json(l),
transform_batch=transform_batch,
serialize=json.dumps,
separator=",",
end=lambda qs: "]}",
serialize=orjson.dumps,
separator=b",",
end=lambda qs: b"]}",
content_type="application/json",
)
formats["v0preview-geojson"] = OutputFormat(
prepare_queryset=lambda qs: qs.select_related("dn_latest_non_skip_report"),
start=(
'{"type": "FeatureCollection", "usage": USAGE,'.replace(
"USAGE", json.dumps(VTS_USAGE)
b'{"type":"FeatureCollection","usage":USAGE,'.replace(
b"USAGE", orjson.dumps(VTS_USAGE)
)
+ '"features": ['
+ b'"features":['
),
transform=lambda l: to_geojson(location_v0_json(l)),
transform_batch=transform_batch_geojson,
serialize=json.dumps,
separator=",",
end=lambda qs: "]}",
serialize=orjson.dumps,
separator=b",",
end=lambda qs: b"]}",
content_type="application/json",
)
return formats
Expand All @@ -248,32 +248,34 @@ def make_formats(json_convert, geojson_convert):
return {
"json": OutputFormat(
prepare_queryset=lambda qs: qs,
start='{"results": [',
start=b'{"results":[',
transform=lambda l: json_convert(l),
transform_batch=lambda batch: batch,
serialize=json.dumps,
separator=",",
end=lambda qs: '], "total": TOTAL}'.replace("TOTAL", str(qs.count())),
serialize=orjson.dumps,
separator=b",",
end=lambda qs: b'],"total":TOTAL}'.replace(
b"TOTAL", str(qs.count()).encode("ascii")
),
content_type="application/json",
),
"geojson": OutputFormat(
prepare_queryset=lambda qs: qs,
start='{"type": "FeatureCollection", "features": [',
start=b'{"type":"FeatureCollection","features":[',
transform=lambda l: geojson_convert(l),
transform_batch=lambda batch: batch,
serialize=json.dumps,
separator=",",
end=lambda qs: "]}",
serialize=orjson.dumps,
separator=b",",
end=lambda qs: b"]}",
content_type="application/json",
),
"nlgeojson": OutputFormat(
prepare_queryset=lambda qs: qs,
start="",
start=b"",
transform=lambda l: geojson_convert(l),
transform_batch=lambda batch: batch,
serialize=json.dumps,
separator="\n",
end=lambda qs: "",
serialize=orjson.dumps,
separator=b"\n",
end=lambda qs: b"",
content_type="text/plain",
),
}
Expand Down
17 changes: 8 additions & 9 deletions vaccinate/api/test_create_location_from_source_location.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
import json
import pathlib

import orjson
from core.models import ConcordanceIdentifier, Location, SourceLocation
from reversion.models import Revision


def test_create_location_from_source_location(client, api_key):
fixture = json.load(
(
pathlib.Path(__file__).parent
/ "test-data"
/ "importSourceLocations"
/ "002-new-location.json"
).open()
)
with (
pathlib.Path(__file__).parent
/ "test-data"
/ "importSourceLocations"
/ "002-new-location.json"
).open() as fixture_file:
fixture = orjson.loads(fixture_file.read())
source_location = SourceLocation.objects.create(
source_uid=fixture["source_uid"],
source_name=fixture["source_name"],
Expand Down
Loading

0 comments on commit c33b0d0

Please sign in to comment.