ripgrep
yaml-to-sqlite/tests/test_cli.py
1 from click.testing import CliRunner
2 from yaml_to_sqlite import cli
3 import sqlite_utils
29
30
31 def test_without_pk(tmpdir):
32 db_path = tmpdir / "db.db"
33 assert (
44
45
46 def test_with_pk(tmpdir):
47 db_path = tmpdir / "db.db"
48 assert (
59
60
61 def test_single_column(tmpdir):
62 db_path = tmpdir / "db.db"
63 test_yaml = "- One\n" "- Two\n" "- Three\n"
64 assert (
65 0
68 cli.cli,
69 [str(db_path), "numbers", "-", "--single-column", "name"],
70 input=test_yaml,
71 )
72 .exit_code
78
79
80 def test_alters_if_necessary(tmpdir):
81 db_path = tmpdir / "db.db"
82 assert (
yaml-to-sqlite/setup.py
24 packages=find_packages(),
25 install_requires=["click", "PyYAML", "sqlite-utils>=3.9.1"],
26 setup_requires=["pytest-runner"],
27 extras_require={"test": ["pytest"]},
28 entry_points="""
29 [console_scripts]
30 yaml-to-sqlite=yaml_to_sqlite.cli:cli
31 """,
32 tests_require=["yaml-to-sqlite[test]"],
33 url="https://github.com/simonw/yaml-to-sqlite",
34 classifiers=[
whosonfirst-datasette/Dockerfile
1 FROM datasetteproject/datasette
2 RUN apt update && apt install -y wget bzip2
3 RUN wget -q https://latest.datasette.io/fixtures.db
4 # RUN datasette inspect --inspect-file=/mnt/whosonfirst/inspect.json /fixtures.db /mnt/whosonfirst/whosonfirst-data-latest.db
5 EXPOSE 8001
6 CMD ["datasette", "-h", "0.0.0.0", "-p", "8001", "/mnt/whosonfirst/whosonfirst-data-latest.db", "/fixtures.db", "--cors"]
vaccinate-ca-datasette/metadata.yml
5 about_url: https://github.com/simonw/vaccinate-ca-datasette
6 description_html: |-
7 <a href="/vaccinateca/locations?_facet=Affiliation&_facet=Latest+report+yes%3F&Latest+report+yes%3F=1&_facet_array=Availability+Info">Latest places that reported yes, on a map</a>
twitter-to-sqlite/setup.py
37 "python-dateutil",
38 ],
39 extras_require={"test": ["pytest"]},
40 tests_require=["twitter-to-sqlite[test]"],
41 )
twitter-to-sqlite/tests/test_save_tweets.py
2 import pathlib
3
4 import pytest
5 import sqlite_utils
6 from twitter_to_sqlite import utils
7
8
9 @pytest.fixture
10 def tweets():
11 return json.load(open(pathlib.Path(__file__).parent / "tweets.json"))
12
13
14 @pytest.fixture
15 def db(tweets):
16 db = sqlite_utils.Database(memory=True)
19
20
21 def test_tables(db):
22 assert {
23 "sources",
52
53
54 def test_users(db):
55 user_rows = list(db["users"].rows)
56 assert [
184
185
186 def test_tweets(db):
187 tweet_rows = list(db["tweets"].rows)
188 assert [
315
316
317 def test_sources(db):
318 source_rows = list(db["sources"].rows)
319 assert [
341
342
343 def test_places(db):
344 place_rows = list(db["places"].rows)
345 assert [
359
360
361 def test_media(db):
362 media_rows = list(db["media"].rows)
363 media_tweets_rows = list(db["media_tweets"].rows)
twitter-to-sqlite/tests/test_migrations.py
1 import sqlite_utils
2 from click.testing import CliRunner
3 import sqlite_utils
4 from twitter_to_sqlite import cli, migrations
5
6 from .test_import import zip_contents_path
7 from .test_save_tweets import db, tweets
8
9
10 def test_no_migrations_on_first_run(tmpdir, zip_contents_path):
11 output = str(tmpdir / "output.db")
12 args = ["import", output, str(zip_contents_path / "follower.js")]
21
22
23 def test_convert_source_column():
24 db = sqlite_utils.Database(memory=True)
25 db["tweets"].insert_all(
51
52
53 def test_convert_source_column_against_real_database(db):
54 assert "migrations" not in db.table_names()
55 migrations.convert_source_column(db)
twitter-to-sqlite/tests/test_import.py
2 import pathlib
3
4 import pytest
5 import sqlite_utils
6 from click.testing import CliRunner
7 from twitter_to_sqlite import cli
8
10
11
12 @pytest.fixture
13 def zip_contents_path():
14 return pathlib.Path(__file__).parent / "zip_contents"
15
16
17 @pytest.fixture
18 def import_test_zip(tmpdir, zip_contents_path):
19 archive = str(tmpdir / "archive.zip")
20 buf = io.BytesIO()
25
26
27 def test_create_zip(zip_contents_path):
28 zf = create_zip(zip_contents_path)
29 assert {
37
38
39 def test_cli_import_zip_file(import_test_zip):
40 tmpdir, archive = import_test_zip
41 output = str(tmpdir / "output.db")
42 result = CliRunner().invoke(cli.cli, ["import", output, archive])
46
47
48 def test_cli_import_folder(tmpdir, zip_contents_path):
49 output = str(tmpdir / "output.db")
50 result = CliRunner().invoke(cli.cli, ["import", output, str(zip_contents_path)])
54
55
56 def test_cli_import_specific_files(tmpdir, zip_contents_path):
57 output = str(tmpdir / "output.db")
58 result = CliRunner().invoke(
109
110
111 def test_deletes_existing_archive_tables(import_test_zip):
112 tmpdir, archive = import_test_zip
113 output = str(tmpdir / "output.db")
114 db = sqlite_utils.Database(output)
todomvc-datasette/js/template.js
20
21 var escape = function (string) {
22 return (string && reHasUnescapedHtml.test(string))
23 ? string.replace(reUnescapedHtml, escapeHtmlChar)
24 : string;
todomvc-datasette/js/store.js
1 /*jshint eqeqeq:false */
2 // Signed token for the todomvc user on https://latest.datasette.io/
3 // See https://github.com/simonw/todomvc-datasette/issues/2
4 let TOKEN = 'dstok_eyJhIjoidG9kb212YyIsInRva2VuIjoiZHN0b2siLCJ0IjoxNjY5OTQ0NjgzfQ.wzSKl10MouQaKZ0FuTKEftNJko4';
13 let self = this;
14 self._dbName = `todo_${name}`;
15 fetch("https://latest.datasette.io/ephemeral/-/create", {
16 method: "POST",
17 mode: "cors",
48 }
49 fetch(
50 `https://latest.datasette.io/ephemeral/${
51 this._dbName
52 }.json?_shape=array&_sort_desc=id&completed=${
76 callback = callback || function () {};
77 fetch(
78 `https://latest.datasette.io/ephemeral/${self._dbName}.json?_shape=array&_sort_desc=id`,
79 {
80 mode: "cors",
107 if (id) {
108 fetch(
109 `https://latest.datasette.io/ephemeral/${self._dbName}/${id}/-/update`,
110 {
111 method: "POST",
124 } else {
125 // Save it and store ID
126 fetch(`https://latest.datasette.io/ephemeral/${self._dbName}/-/insert`, {
127 method: "POST",
128 mode: "cors",
151 Store.prototype.remove = function (id, callback) {
152 fetch(
153 `https://latest.datasette.io/ephemeral/${this._dbName}/${id}/-/delete`,
154 {
155 method: "POST",
todomvc-datasette/README.md
1 # TodoMVC demo backed by Datasette
2
3 This is a modified version of the [Vanilla JavaScript TodoMVC Example](https://github.com/tastejs/todomvc/tree/gh-pages/examples/vanillajs) adapted to demonstrate the [Datasette 1.0 alpha JSON API](https://docs.datasette.io/en/latest/changelog.html#a0-2022-11-29).
4
5 To try this out, visit https://todomvc.datasette.io/
til/yaml/yamlfmt.md
9 They suggest this:
10 ```bash
11 go install github.com/google/yamlfmt/cmd/yamlfmt@latest
12 ```
13 This worked on my machine because I had Go installed via Homebrew (`go version go1.20.4 darwin/arm64` according to `go version`).
35 types: [created]
36 jobs:
37 test:
38 runs-on: ubuntu-latest
39 strategy:
40 matrix:
71
72 jobs:
73 test:
74 runs-on: ubuntu-latest
75 strategy:
76 matrix:
til/typescript/basic-tsc.md
22 % ./node_modules/.bin/tsc --init
23
24 Next step: create a `.ts` file to start testing it out. I put the following in `greetings.ts`:
25
26 ```typescript
til/sqlite/unix-timestamp-milliseconds-sqlite.md
16 )
17 ```
18 [Try these both here](https://latest.datasette.io/_memory?sql=select%0D%0A++strftime%28%27%25s%27%2C+%27now%27%29+as+seconds_since_epoch%2C%0D%0A++cast%28%28julianday%28%27now%27%29+-+2440587.5%29+*+86400+*+1000+as+integer%29+as+ms_since_epoch%3B).
19
20 In SQLite 3.42.0 and higher you can do this instead:
30 select strftime('%Y-%m-%d %H:%M:%S', :timestamp_ms / 1000, 'unixepoch')
31 ```
32 The output looks like this: `2023-04-09 05:04:24` - [try that out here](https://latest.datasette.io/_memory?sql=select+strftime%28%27%25Y-%25m-%25d+%25H%3A%25M%3A%25S%27%2C+%3Atimestamp_ms+%2F+1000%2C+%27unixepoch%27%29%0D%0A×tamp_ms=1681016664769).
33
34 ## Why not multiply seconds by 1000?
86 ) as timestamp_ms
87 ```
88 [Try that here](https://latest.datasette.io/_memory?sql=select%0D%0A++(1000+*+(strftime(%27%25s%27%2C+%27now%27)))+%2B+cast(%0D%0A++++substr(%0D%0A++++++strftime(%27%25f%27%2C+%27now%27)%2C%0D%0A++++++instr(strftime(%27%25f%27%2C+%27now%27)%2C+%27.%27)+%2B+1%0D%0A++++)+as+integer%0D%0A++)+as+timestamp_ms).
89
90 The `substr('18.413', instr('18.413', '.') + 1)` part returns just the characters after the first `.` character, which are then cast to integer to get the milliseconds fraction of that second. These are added to `1000 * ` the unix timestamp in seconds.
til/sqlite/track-timestamped-changes-to-a-table.md
75 INSERT INTO foo VALUES ('hello4');
76 ```
77 To test this I ran `sqlite3` (with no arguments, which provides an in-memory database to play with), pasted in the above and then ran this:
78 ```
79 sqlite> .headers on
til/sqlite/text-value-is-integer-or-float.md
17 The `|| '.0'` bit there is needed because `cast('1' as REAL)` returns `1.0`, not just `1`.
18
19 (Note that `1.200` will not pass this test and will be incorrectly considered an invalid floating point representation)
20
21 ## Demos
42 )
43 ```
44 [Try that here](https://latest.datasette.io/fixtures?sql=select%0D%0A++value%2C%0D%0A++cast%28cast%28value+AS+REAL%29+AS+TEXT%29+in+%28value%2C+value+%7C%7C+%27.0%27%29+as+is_valid_float%0D%0Afrom%0D%0A++%28%0D%0A++++select%0D%0A++++++%271%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++%271.1%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++%27dog%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++null+as+value%0D%0A++%29)
45
46 | value | is_valid_float |
71 )
72 ```
73 [Try that here](https://latest.datasette.io/fixtures?sql=select%0D%0A++value%2C%0D%0A++cast%28cast%28value+AS+INTEGER%29+AS+TEXT%29+%3D+value+as+is_valid_int%0D%0Afrom%0D%0A++%28%0D%0A++++select%0D%0A++++++%271%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++%271.1%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++%27dog%27+as+value%0D%0A++++union%0D%0A++++select%0D%0A++++++null+as+value%0D%0A++%29)
74
75 | value | is_valid_int |
til/sqlite/subqueries-in-select.md
66 10
67 ```
68 [Try that here](https://latest-with-plugins.datasette.io/github?sql=select%0D%0A++repos.full_name%2C%0D%0A++%28%0D%0A++++select%0D%0A++++++max%28created_at%29%0D%0A++++from%0D%0A++++++releases%0D%0A++++where%0D%0A++++++repo+%3D+repos.id%0D%0A++%29+as+max_created_at%2C%0D%0A++%28%0D%0A++++select%0D%0A++++++count%28*%29%0D%0A++++from%0D%0A++++++releases%0D%0A++++where%0D%0A++++++repo+%3D+repos.id%0D%0A++%29+as+releases_count%2C%0D%0A++%28%0D%0A++++select%0D%0A++++++json_group_array%28%0D%0A++++++++json_object%28%0D%0A++++++++++%27id%27%2C%0D%0A++++++++++id%2C%0D%0A++++++++++%27name%27%2C%0D%0A++++++++++name%2C%0D%0A++++++++++%27created_at%27%2C%0D%0A++++++++++created_at%0D%0A++++++++%29%0D%0A++++++%29%0D%0A++++from%0D%0A++++++%28%0D%0A++++++++select%0D%0A++++++++++*%0D%0A++++++++from%0D%0A++++++++++releases%0D%0A++++++++where%0D%0A++++++++++repo+%3D+repos.id%0D%0A++++++++order+by%0D%0A++++++++++created_at+desc%0D%0A++++++++limit%0D%0A++++++++++3%0D%0A++++++%29%0D%0A++%29+as+recent_releases%0D%0Afrom%0D%0A++repos%0D%0Aorder+by%0D%0A++releases_count+desc%0D%0Alimit%0D%0A++10).
69
70 Here are the first three rows of the output. The `recent_releases` column includes a JSON array with details of the three most recent releases for each of those repositories.
214 limit 10;
215 ```
216 [Try that out here](https://latest-with-plugins.datasette.io/github?sql=with+cte+as+%28%0D%0A++select%0D%0A++++repos.full_name%2C%0D%0A++++max%28releases.created_at%29+over+%28partition+by+repos.id%29+as+max_created_at%2C%0D%0A++++count%28*%29+over+%28partition+by+repos.id%29+as+releases_count%2C%0D%0A++++releases.id+as+rel_id%2C%0D%0A++++releases.name+as+rel_name%2C%0D%0A++++releases.created_at+as+rel_created_at%2C%0D%0A++++rank%28%29+over+%28partition+by+repos.id+order+by+releases.created_at+desc%29+as+rel_rank%0D%0A++from+repos%0D%0A++++join+releases+on+releases.repo+%3D+repos.id%0D%0A%29%0D%0Aselect%0D%0A++full_name%2C%0D%0A++max_created_at%2C%0D%0A++releases_count%2C%0D%0A++json_group_array%28%0D%0A++++json_object%28%0D%0A++++++%27id%27%2C+rel_id%2C%0D%0A++++++%27name%27%2C+rel_name%2C%0D%0A++++++%27created_at%27%2C+rel_created_at%0D%0A++++%29%0D%0A++%29+as+recent_releases%0D%0Afrom+cte%0D%0Awhere+rel_rank+%3C%3D+3%0D%0Agroup+by+full_name%0D%0Aorder+by+releases_count+desc%0D%0Alimit+10%3B).
217
218 The key trick here is the `rank() over` line:
221 rank() over (partition by repos.id order by releases.created_at desc) as rel_rank
222 ```
223 This adds an integer called `rel_rank` to each row in that CTE showing the relative ranking of each release, ordered by their created date. [This version of the query](https://latest-with-plugins.datasette.io/github?sql=with+cte+as+%28%0D%0A++select%0D%0A++++repos.full_name%2C%0D%0A++++max%28releases.created_at%29+over+%28partition+by+repos.id%29+as+max_created_at%2C%0D%0A++++count%28*%29+over+%28partition+by+repos.id%29+as+releases_count%2C%0D%0A++++releases.id+as+rel_id%2C%0D%0A++++releases.name+as+rel_name%2C%0D%0A++++releases.created_at+as+rel_created_at%2C%0D%0A++++rank%28%29+over+%28partition+by+repos.id+order+by+releases.created_at+desc%29+as+rel_rank%0D%0A++from+repos%0D%0A++++join+releases+on+releases.repo+%3D+repos.id%0D%0A%29%0D%0Aselect+*+from+cte) shows the contents of the `cte` table, which starts like this:
224
225 | full_name | max_created_at | releases_count | rel_id | rel_name | rel_created_at | rel_rank |
257 left join releases on releases.repo = repos.id
258 ```
259 This almost has the desired effect, but if [you run it](https://latest-with-plugins.datasette.io/github?sql=with+cte+as+(%0D%0A++select%0D%0A++++repos.full_name%2C%0D%0A++++max(releases.created_at)+over+(partition+by+repos.id)+as+max_created_at%2C%0D%0A++++count(*)+over+(partition+by+repos.id)+as+releases_count%2C%0D%0A++++releases.id+as+rel_id%2C%0D%0A++++releases.name+as+rel_name%2C%0D%0A++++releases.created_at+as+rel_created_at%2C%0D%0A++++rank()+over+(partition+by+repos.id+order+by+releases.created_at+desc)+as+rel_rank%0D%0A++from+repos%0D%0A++++left+join+releases+on+releases.repo+%3D+repos.id%0D%0A)%0D%0Aselect%0D%0A++full_name%2C%0D%0A++max_created_at%2C%0D%0A++releases_count%2C%0D%0A++json_group_array(%0D%0A++++json_object(%0D%0A++++++%27id%27%2C+rel_id%2C%0D%0A++++++%27name%27%2C+rel_name%2C%0D%0A++++++%27created_at%27%2C+rel_created_at%0D%0A++++)%0D%0A++)+as+recent_releases%0D%0Afrom+cte%0D%0Awhere+rel_rank+%3C%3D+3%0D%0Agroup+by+full_name%0D%0Aorder+by+releases_count+desc) you'll notice that it returns rows that look like this:
260
261 | full_name | max_created_at | releases_count | recent_releases |
320 order by releases_count desc
321 ```
322 [Try that finished query here](https://latest-with-plugins.datasette.io/github?sql=%0D%0Awith+cte+as+%28%0D%0A++select%0D%0A++++repos.full_name%2C%0D%0A++++max%28releases.created_at%29+over+%28partition+by+repos.id%29+as+max_created_at%2C%0D%0A++++count%28releases.id%29+over+%28partition+by+repos.id%29+as+releases_count%2C%0D%0A++++releases.id+as+rel_id%2C%0D%0A++++releases.name+as+rel_name%2C%0D%0A++++releases.created_at+as+rel_created_at%2C%0D%0A++++rank%28%29+over+%28partition+by+repos.id+order+by+releases.created_at+desc%29+as+rel_rank%0D%0A++from+repos%0D%0A++++left+join+releases+on+releases.repo+%3D+repos.id%0D%0A%29%0D%0Aselect%0D%0A++full_name%2C%0D%0A++max_created_at%2C%0D%0A++releases_count%2C%0D%0A++json_group_array%28%0D%0A++++json_object%28%0D%0A++++++%27id%27%2C+rel_id%2C%0D%0A++++++%27name%27%2C+rel_name%2C%0D%0A++++++%27created_at%27%2C+rel_created_at%0D%0A++++%29%0D%0A++%29+filter+%28where+rel_id+is+not+null%29+as+recent_releases%0D%0Afrom+cte%0D%0Awhere+rel_rank+%3C%3D+3%0D%0Agroup+by+full_name%0D%0Aorder+by+releases_count+desc).
323
324 ## Simplified to use just one window function
353 order by releases_count desc
354 ```
355 [Try that version here](https://latest-with-plugins.datasette.io/github?sql=%0D%0Awith+cte+as+%28%0D%0A++select%0D%0A++++repos.full_name%2C%0D%0A++++releases.created_at%2C%0D%0A++++releases.id+as+rel_id%2C%0D%0A++++releases.name+as+rel_name%2C%0D%0A++++releases.created_at+as+rel_created_at%2C%0D%0A++++rank%28%29+over+%28partition+by+repos.id+order+by+releases.created_at+desc%29+as+rel_rank%0D%0A++from+repos%0D%0A++++left+join+releases+on+releases.repo+%3D+repos.id%0D%0A%29%0D%0Aselect%0D%0A++full_name%2C%0D%0A++max%28created_at%29+as+max_created_at%2C%0D%0A++count%28rel_id%29+as+releases_count%2C%0D%0A++json_group_array%28%0D%0A++++json_object%28%0D%0A++++++%27id%27%2C+rel_id%2C%0D%0A++++++%27name%27%2C+rel_name%2C%0D%0A++++++%27created_at%27%2C+rel_created_at%0D%0A++++%29%0D%0A++%29+filter+%28where+rel_id+is+not+null+and+rel_rank+%3C%3D+3%29+as+recent_releases%0D%0Afrom+cte%0D%0Agroup+by+full_name%0D%0Aorder+by+releases_count+desc).
356
357 The trick here is to select the `created_at` and `releases.id` columns in the initial CTE, then use `max(created_at) as max_created_at` and `count(rel_id) as releases_count` in the second section of the query, in order to total things up based on the group by.
til/sqlite/steampipe.md
17 Let's start with the Hacker News API plugin, [turbot/steampipe-plugin-hackernews](https://github.com/turbot/steampipe-plugin-hackernews). I like this one because it doesn't require an API key.
18
19 First grab the [latest release](https://github.com/turbot/steampipe-plugin-hackernews/releases/latest) of the extension. I'm on an M2 MacBook so I grabbed the `steampipe_sqlite_hackernews.darwin_arm64.tar.gz ` file:
20
21 ```bash
82 Once you've jumped through the security hooks to enable an extension it can be used directly with Datasette as well. Let's try two at once - the Hacker News one and the [crt.sh](https://hub.steampipe.io/plugins/turbot/crtsh) plugin for querying certificate transparency logs.
83
84 Download [the latest steampipe-plugin-crtsh](https://github.com/turbot/steampipe-plugin-crtsh/releases/latest) file - for macOS I used:
85
86 ```bash
til/sqlite/sqlite-tg.md
128 I decided to try implementing a version of that on top of `sqlite-tg`.
129
130 I grabbed the latest release of `timezones.geojson.zip` from [evansiroky/timezone-boundary-builder/](https://github.com/evansiroky/timezone-boundary-builder/releases) and unzipped it to get a `combined.geojson` file that starts like this:
131
132 ```json
til/sqlite/splitting-commas-sqlite.md
25 | x12 | y1234 | z12345 |
26
27 Here's [a live demo of the query](https://latest.datasette.io/fixtures?sql=with+comma_locations+as+%28%0D%0A++select+instr%28%3Apath%2C+%27%2C%27%29+as+first_comma%2C%0D%0A++instr%28%3Apath%2C+%27%2C%27%29+%2B+instr%28substr%28%3Apath%2C+instr%28%3Apath%2C+%27%2C%27%29+%2B+1%29%2C+%27%2C%27%29+as+second_comma%0D%0A%29%2C+variables+as+%28%0D%0A++select%0D%0A++++substr%28%3Apath%2C+0%2C+first_comma%29+as+first%2C%0D%0A++++substr%28%3Apath%2C+first_comma+%2B+1%2C+second_comma+-+first_comma+-+1%29+as+second%2C%0D%0A++++substr%28%3Apath%2C+second_comma+%2B+1%29+as+third%0D%0A++from+comma_locations%0D%0A%29%0D%0Aselect+*+from+variables&path=x12%2Cy1234%2Cz12345).
til/sqlite/simple-recursive-cte.md
10 select * from counter limit 5;
11 ```
12 This query [returns five rows](https://latest.datasette.io/_memory?sql=with+recursive+counter%28x%29+as+%28%0D%0A++select+0%0D%0A++++union%0D%0A++select+x+%2B+1+from+counter%0D%0A%29%0D%0Aselect+*+from+counter+limit+10%3B) from a single column `x` - from 0 to 4.
13
14 | x |
til/sqlite/replicating-rqlite.md
5 By default `rqlite` asks you to use its own custom HTTP API - but I wanted to try running it against Datasette. rqlite author Philip O'Toole confirmed that [this should work](https://twitter.com/general_order24/status/1343619601758908419) provided any writes go through the API - each node can be configured to write to an on-disk database file which Datasette can then read from (the default is to use in-memory databases and an on-disk Raft log).
6
7 Here's how I got that working on my macOS laptop. I used the latest macOS binary from https://github.com/rqlite/rqlite/releases (`rqlite` is written in Go and provides pre-complied binaries for different systems).
8
9 cd /tmp
til/sqlite/related-content.md
16 ```sql
17 select title, rank from til_fts where til_fts match '
18 i OR wanted OR to OR run OR some OR django OR tests OR using OR
19 pytestdjango OR and OR with OR configured OR pick OR up OR the
20 OR databaseurl OR environment OR variable OR via OR djdatabaseurl
21 OR against OR a OR postgresql OR server OR running OR in OR
25 ```
26
27 Here are the results from [that query](https://til.simonwillison.net/tils?sql=select+title%2C+rank+from+til_fts+where+til_fts+match+%27%0D%0A++i+OR+wanted+OR+to+OR+run+OR+some+OR+django+OR+tests+OR+using+OR%0D%0A++pytestdjango+OR+and+OR+with+OR+configured+OR+pick+OR+up+OR+the%0D%0A++OR+databaseurl+OR+environment+OR+variable+OR+via+OR+djdatabaseurl%0D%0A++OR+against+OR+a+OR+postgresql+OR+server+OR+running+OR+in+OR%0D%0A++github+OR+actions+OR+it+OR+took+OR+while+OR+figure+OR+out+OR%0D%0A++right+OR+pattern+OR+trick+OR+was+OR+define+OR+postgres+OR+service%27%0D%0Aorder+by+rank+limit+5). Unsurprisingly the entry itself shows up first, but the other items look relevant enough to me:
28
29 title | rank
30 -- | --
31 Running tests against PostgreSQL in a service container | -61.04335068286244
32 Talking to a PostgreSQL service container from inside a Docker container | -37.54518907167069
33 Allowing a container in Docker Desktop for Mac to talk to a PostgreSQL server on the host machine | -29.712660785491842
60 5
61 ```
62 And [an example of it running](https://til.simonwillison.net/tils?sql=select%0D%0A++til.topic%2C+til.slug%2C+til.title%2C+til.created%0D%0Afrom%0D%0A++til%0D%0A++join+til_fts+on+til.rowid+%3D+til_fts.rowid%0D%0Awhere%0D%0A++til_fts+match+%3Awords%0D%0A++and+not+(%0D%0A++++til.slug+%3D+%3Aslug%0D%0A++++and+til.topic+%3D+%3Atopic%0D%0A++)%0D%0Aorder+by%0D%0A++til_fts.rank%0D%0Alimit%0D%0A++5&words=i+OR+wanted+OR+to+OR+run+OR+some+OR+django+OR+tests+OR+using+OR+++pytestdjango+OR+and+OR+with+OR+configured+OR+pick+OR+up+OR+the+++OR+databaseurl+OR+environment+OR+variable+OR+via+OR+djdatabaseurl+++OR+against+OR+a+OR+postgresql+OR+server+OR+running+OR+in+OR+++github+OR+actions+OR+it+OR+took+OR+while+OR+figure+OR+out+OR+++right+OR+pattern+OR+trick+OR+was+OR+define+OR+postgres+OR+service&slug=postgresq-service-container&topic=github-actions), which returns the following:
63
64 topic | slug | title | created
til/sqlite/pragma-function-list.md
33 select json_extract('{"foo": "bar", "bar": "baz"}', '$.foo', '$.bar') as bits
34 ```
35 This [returns](https://latest.datasette.io/_memory?sql=select+json_extract%28%27%7B%22foo%22%3A+%22bar%22%2C+%22bar%22%3A+%22baz%22%7D%27%2C+%27%24.foo%27%2C+%27%24.bar%27%29+as+bits) a JSON array corresponding to each argument past the first one:
36
37 ```json
68 pragma_function_list()
69 ```
70 [Run against a Datasette instance](https://latest.datasette.io/_memory?sql=select%0D%0A++name%2C%0D%0A++narg%2C%0D%0A++flags%2C%0D%0A++type%2C%0D%0A++flags+%26+0x000000800+%21%3D+0+as+deterministic%2C%0D%0A++flags+%26+0x000080000+%21%3D+0+as+directonly%2C%0D%0A++flags+%26+0x000100000+%21%3D+0+as+subtype%2C%0D%0A++flags+%26+0x000200000+%21%3D+0+as+innocuous%0D%0Afrom%0D%0A++pragma_function_list%28%29) returns the following data (truncated to the highlights):
71
72 | name | narg | flags | type | deterministic | directonly | subtype | innocuous |
til/sqlite/one-line-csv-operations.md
13 'SELECT passenger_count, COUNT(*), AVG(total_amount) FROM taxi GROUP BY passenger_count'
14
15 You can get `taxi.csv` by downloading the compressed file from [here](https://github.com/multiprocessio/dsq/blob/43e72ff1d2c871082fed0ae401dd59e2ff9f6cfe/testdata/taxi.csv.7z) and running:
16
17 7z e -aos taxi.csv.7z
til/sqlite/now-argument-stability.md
18 That `strftime()` pattern is [described here](https://stackoverflow.com/questions/17574784/sqlite-current-timestamp-with-milliseconds/56895050#56895050) and [in this TIL](https://til.simonwillison.net/sqlite/track-timestamped-changes-to-a-table), it returns the current Unix timestamp in milliseconds.
19
20 The [result of the above query](https://latest.datasette.io/_memory?sql=select+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29+as+t1%0D%0Aunion+all%0D%0Aselect+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29%0D%0Aunion+all%0D%0Aselect+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29%0D%0Aunion+all%0D%0Aselect+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29%0D%0Aunion+all%0D%0Aselect+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29%0D%0Aunion+all%0D%0Aselect+strftime%28%27%25s%27%2C%27now%27%29+%7C%7C+substr%28strftime%28%27%25f%27%2C%27now%27%29%2C4%29) is:
21
22 | t1 |
til/sqlite/list-all-columns-in-a-database.md
3 Here's a devious trick for listing ALL columns in a SQLite database, using a SQL query that generates another SQL query.
4
5 The first query ([demo](https://latest.datasette.io/fixtures?sql=select+group_concat%28%0D%0A++%22select+%27%22+%7C%7C+name+%7C%7C+%22%27+as+table_name%2C+*+from+pragma_table_info%28%27%22+%7C%7C+name+%7C%7C+%22%27%29%22%0D%0A%2C+%27+union+%27%29+%7C%7C+%27+order+by+table_name%2C+cid%27%0D%0A++from+sqlite_master+where+type+%3D+%27table%27%3B)):
6
7 ```sql
11 from sqlite_master where type = 'table';
12 ```
13 This outputs the second query, which will look something like this ([demo](https://latest.datasette.io/fixtures?sql=select+%27simple_primary_key%27+as+table_name%2C+*+from+pragma_table_info%28%27simple_primary_key%27%29+union+select+%27primary_key_multiple_columns%27+as+table_name%2C+*+from+pragma_table_info%28%27primary_key_multiple_columns%27%29+union+select+%27primary_key_multiple_columns_explicit_label%27+as+table_name%2C+*+from+pragma_table_info%28%27primary_key_multiple_columns_explicit_label%27%29+union+select+%27compound_primary_key%27+as+table_name%2C+*+from+pragma_table_info%28%27compound_primary_key%27%29+union+select+%27compound_three_primary_keys%27+as+table_name%2C+*+from+pragma_table_info%28%27compound_three_primary_keys%27%29+union+select+%27foreign_key_references%27+as+table_name%2C+*+from+pragma_table_info%28%27foreign_key_references%27%29+union+select+%27sortable%27+as+table_name%2C+*+from+pragma_table_info%28%27sortable%27%29+union+select+%27no_primary_key%27+as+table_name%2C+*+from+pragma_table_info%28%27no_primary_key%27%29+union+select+%27123_starts_with_digits%27+as+table_name%2C+*+from+pragma_table_info%28%27123_starts_with_digits%27%29+union+select+%27Table+With+Space+In+Name%27+as+table_name%2C+*+from+pragma_table_info%28%27Table+With+Space+In+Name%27%29+union+select+%27table%2Fwith%2Fslashes.csv%27+as+table_name%2C+*+from+pragma_table_info%28%27table%2Fwith%2Fslashes.csv%27%29+union+select+%27complex_foreign_keys%27+as+table_name%2C+*+from+pragma_table_info%28%27complex_foreign_keys%27%29+union+select+%27custom_foreign_key_label%27+as+table_name%2C+*+from+pragma_table_info%28%27custom_foreign_key_label%27%29+union+select+%27units%27+as+table_name%2C+*+from+pragma_table_info%28%27units%27%29+union+select+%27tags%27+as+table_name%2C+*+from+pragma_table_info%28%27tags%27%29+union+select+%27searchable%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable%27%29+union+select+%27searchable_tags%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable_tags%27%29+union+select+%27searchable_fts%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable_fts%27%29+union+select+%27searchable_fts_content%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable_fts_content%27%29+union+select+%27searchable_fts_segments%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable_fts_segments%27%29+union+select+%27searchable_fts_segdir%27+as+table_name%2C+*+from+pragma_table_info%28%27searchable_fts_segdir%27%29+union+select+%27select%27+as+table_name%2C+*+from+pragma_table_info%28%27select%27%29+union+select+%27infinity%27+as+table_name%2C+*+from+pragma_table_info%28%27infinity%27%29+union+select+%27facet_cities%27+as+table_name%2C+*+from+pragma_table_info%28%27facet_cities%27%29+union+select+%27facetable%27+as+table_name%2C+*+from+pragma_table_info%28%27facetable%27%29+union+select+%27binary_data%27+as+table_name%2C+*+from+pragma_table_info%28%27binary_data%27%29+union+select+%27roadside_attractions%27+as+table_name%2C+*+from+pragma_table_info%28%27roadside_attractions%27%29+union+select+%27attraction_characteristic%27+as+table_name%2C+*+from+pragma_table_info%28%27attraction_characteristic%27%29+union+select+%27roadside_attraction_characteristics%27+as+table_name%2C+*+from+pragma_table_info%28%27roadside_attraction_characteristics%27%29+order+by+table_name%2C+cid)):
14 ```sql
15 select 'simple_primary_key' as table_name, * from pragma_table_info('simple_primary_key') union
156 sqlite_master.name
157 ```
158 [Demo](https://latest.datasette.io/fixtures?sql=select%0D%0A++sqlite_master.name+as+table_name%2C%0D%0A++table_info.*%0D%0Afrom%0D%0A++sqlite_master%0D%0A++join+pragma_table_info%28sqlite_master.name%29+as+table_info%0D%0Aorder+by%0D%0A++sqlite_master.name%2C%0D%0A++table_info.cid).
159
160 This works with the `pragma_table_info` and `pragma_index_list` and `pragma_foreign_key_list` functions too.
178 columnName
179 ```
180 [Demo](https://latest.datasette.io/fixtures?sql=SELECT+m.name+as+tableName%2C+p.name+as+columnName%0D%0A++++++++FROM+sqlite_master+m%0D%0A++++++++LEFT+OUTER+JOIN+pragma_table_info%28%28m.name%29%29+p+ON+m.name+%3C%3E+p.name%0D%0A++++++++WHERE+m.type+IN+%28%27table%27%2C%27view%27%29+AND+m.name+NOT+LIKE+%27sqlite_%25%27%0D%0A++++++++ORDER+BY+tableName%2C+columnName).
til/sqlite/ld-preload.md
44 cd SQLite-cf538e27
45
46 Now we can build the extension. The `CPPFLAGS` are optional but I found I needed them to get the full Datasette test suite to pass later on:
47
48 CPPFLAGS="-DSQLITE_ENABLE_FTS3 -DSQLITE_ENABLE_FTS3_PARENTHESIS -DSQLITE_ENABLE_RTREE=1" ./configure
142 ```
143
144 ## Running the Datasette tests
145
146 To run Datasette's test suite I needed to install a few extra dependencies:
147
148 ```
154 source venv/bin/activate
155 pip install wheel # So bdist_wheel works in next step
156 pip install -e '.[test]'
157 LD_PRELOAD=/tmp/SQLite-cf538e27/.libs/libsqlite3.so python3 -c
158 ```
til/sqlite/json-extract-path.md
32 }
33 ```
34 - `$.creatures` returns the JSON array ([demo](https://latest.datasette.io/_memory?sql=select+json_extract%28%27%7B%0D%0A++++%22creatures%22%3A+%5B%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Cleo%22%2C%0D%0A++++++++++++%22species%22%3A+%22dog%22%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Azi%22%2C%0D%0A++++++++++++%22species%22%3A+%22chicken%22%2C%0D%0A++++++++++++%22weight.lb%22%3A+1.6%0D%0A++++++++%7D%0D%0A++++%5D%0D%0A%7D%27%2C+%3Apath%29&path=%24.creatures))
35 - `$.creatures[0].name` returns `Cleo` ([demo](https://latest.datasette.io/_memory?sql=select+json_extract%28%27%7B%0D%0A++++%22creatures%22%3A+%5B%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Cleo%22%2C%0D%0A++++++++++++%22species%22%3A+%22dog%22%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Azi%22%2C%0D%0A++++++++++++%22species%22%3A+%22chicken%22%2C%0D%0A++++++++++++%22weight.lb%22%3A+1.6%0D%0A++++++++%7D%0D%0A++++%5D%0D%0A%7D%27%2C+%3Apath%29&path=%24.creatures%5B0%5D.name))
36 - `$.creatures[1]."weight.lb"` returns `1.6` ([demo](https://latest.datasette.io/_memory?sql=select+json_extract%28%27%7B%0D%0A++++%22creatures%22%3A+%5B%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Cleo%22%2C%0D%0A++++++++++++%22species%22%3A+%22dog%22%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Azi%22%2C%0D%0A++++++++++++%22species%22%3A+%22chicken%22%2C%0D%0A++++++++++++%22weight.lb%22%3A+1.6%0D%0A++++++++%7D%0D%0A++++%5D%0D%0A%7D%27%2C+%3Apath%29&path=%24.creatures%5B1%5D.%22weight.lb%22))
37
38 ## \#-1 to access arrays by index from the end
44 But... you can apply a single integer subtraction operation to that `#` - so you can return the name of the last creature in the array using this:
45
46 - `$.creatures[#-1].name` returns `Azi` ([demo](https://latest.datasette.io/_memory?sql=select+json_extract%28%27%7B%0D%0A++++%22creatures%22%3A+%5B%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Cleo%22%2C%0D%0A++++++++++++%22species%22%3A+%22dog%22%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22name%22%3A+%22Azi%22%2C%0D%0A++++++++++++%22species%22%3A+%22chicken%22%2C%0D%0A++++++++++++%22weight.lb%22%3A+1.6%0D%0A++++++++%7D%0D%0A++++%5D%0D%0A%7D%27%2C+%3Apath%29&path=%24.creatures%5B%23-1%5D.name))
47
48 Here's [the commit](https://sqlite.org/src/info/35ed68a651f) that added that custom SQLite extension in 2019.
54 `$.has\" quotes in it`
55
56 For example ([demo](https://latest.datasette.io/_memory?sql=select+json_extract(%27%7B%0D%0A++++%22has%5C%22+quotes+in+it%22:+%22hello%22%0D%0A%7D%27,+%27$.has%5C%22+quotes+in+it%27)&path=$.has%5C%22+quotes+in+it)):
57
58 ```sql
65 ## Source code
66
67 The latest source code for the JSON module can be found in [ext/misc/json.c](https://www3.sqlite.org/src/file?name=ext/misc/json.c) - in particular the `static JsonNode *jsonLookup(...)
68 ` function.
69
70 The unit tests are really useful - those are spread across these six files:
71
72 - [test/json1.test](https://www3.sqlite.org/src/file?name=test/json1.test)
73 - [test/json101.test](https://www3.sqlite.org/src/file?name=test/json101.test)
74 - [test/json102.test](https://www3.sqlite.org/src/file?name=test/json102.test)
75 - [test/json103.test](https://www3.sqlite.org/src/file?name=test/json103.test)
76 - [test/json104.test](https://www3.sqlite.org/src/file?name=test/json104.test)
77 - [test/json105.test](https://www3.sqlite.org/src/file?name=test/json105.test) - this one has the tests for `[#]` syntax.
til/sqlite/function-list.md
14 select * from pragma_function_list()
15 ```
16 [Here's that for latest.datasette.io](https://latest.datasette.io/fixtures?sql=select+*+from+pragma_function_list%28%29) (truncated):
17
18 | name | builtin | type | enc | narg | flags |
30 select "'" || group_concat(name, "', '") || "'" from pragma_function_list()
31 ```
32 Run [against latest.datasette.io](https://latest.datasette.io/fixtures?sql=select+%22%27%22+||+group_concat(name%2C+%22%27%2C+%27%22)+||+%22%27%22+from+pragma_function_list()) this returned the following:
33
34 `'pow', 'group_concat', 'group_concat', 'json_type', 'json_type', 'julianday', 'ntile', 'nullif', 'sqlite_compileoption_get', 'json_valid', 'json_quote', 'json_patch', '->', 'json_array', 'current_timestamp', 'power', 'sqlite_compileoption_used', 'json_remove', 'json_object', 'json_insert', '->>', 'sin', 'sum', 'quote', 'printf', 'likelihood', 'json_replace', 'json_extract', 'last_value', 'rank', 'sign', 'sqrt', 'sinh', 'tan', 'round', 'round', 'rtrim', 'rtrim', 'nth_value', 'tanh', 'random', 'trim', 'trim', 'time', 'radians', 'trunc', 'total', 'substr', 'substr', 'replace', 'upper', 'subtype', 'typeof', 'load_extension', 'load_extension', 'soundex', 'json_group_array', 'avg', 'abs', 'json_group_object', 'json_array_length', 'json_array_length', 'strftime', 'atan', 'asin', 'acos', 'substring', 'substring', 'randomblob', 'unicode', 'percent_rank', 'row_number', 'atanh', 'asinh', 'acosh', 'cos', 'atan2', 'last_insert_rowid', 'sqlite_log', 'unlikely', 'cosh', 'ceil', 'char', 'unixepoch', 'exp', 'count', 'count', 'date', 'ceiling', 'total_changes', 'changes', 'sqlite_version', 'degrees', 'floor', 'coalesce', 'glob', 'zeroblob', 'hex', 'iif', 'sqlite_source_id', 'format', 'datetime', 'cume_dist', 'ln', 'instr', 'json', 'dense_rank', 'log', 'log', 'ifnull', 'current_date', 'current_time', 'lag', 'lag', 'lag', 'mod', 'log2', 'like', 'like', 'max', 'max', 'min', 'min', 'lead', 'lead', 'lead', 'log10', 'lower', 'ltrim', 'ltrim', 'first_value', 'pi', 'length', 'likely', 'json_set', 'escape_fts', 'prepare_connection_args', 'convert_units', 'sleep', 'rtreedepth', 'match', 'snippet', 'fts5_source_id', 'offsets', 'matchinfo', 'matchinfo', 'optimize', 'rtreecheck', 'rtreenode', 'highlight', 'bm25', 'fts3_tokenizer', 'fts3_tokenizer', 'fts5'`
36 ## Comparing via copy-and-paste
37
38 To see the functions that were registered for https://sqlite-extension-examples.fly.dev/ but not for https://latest.datasette.io/ I used the above output to construct the following query:
39
40 `select name from pragma_function_list() where name not in ('pow', 'group_concat', 'group_concat', 'json_type', 'json_type', 'julianday', 'ntile', 'nullif', 'sqlite_compileoption_get', 'json_valid', 'json_quote', 'json_patch', '->', 'json_array', 'current_timestamp', 'power', 'sqlite_compileoption_used', 'json_remove', 'json_object', 'json_insert', '->>', 'sin', 'sum', 'quote', 'printf', 'likelihood', 'json_replace', 'json_extract', 'last_value', 'rank', 'sign', 'sqrt', 'sinh', 'tan', 'round', 'round', 'rtrim', 'rtrim', 'nth_value', 'tanh', 'random', 'trim', 'trim', 'time', 'radians', 'trunc', 'total', 'substr', 'substr', 'replace', 'upper', 'subtype', 'typeof', 'load_extension', 'load_extension', 'soundex', 'json_group_array', 'avg', 'abs', 'json_group_object', 'json_array_length', 'json_array_length', 'strftime', 'atan', 'asin', 'acos', 'substring', 'substring', 'randomblob', 'unicode', 'percent_rank', 'row_number', 'atanh', 'asinh', 'acosh', 'cos', 'atan2', 'last_insert_rowid', 'sqlite_log', 'unlikely', 'cosh', 'ceil', 'char', 'unixepoch', 'exp', 'count', 'count', 'date', 'ceiling', 'total_changes', 'changes', 'sqlite_version', 'degrees', 'floor', 'coalesce', 'glob', 'zeroblob', 'hex', 'iif', 'sqlite_source_id', 'format', 'datetime', 'cume_dist', 'ln', 'instr', 'json', 'dense_rank', 'log', 'log', 'ifnull', 'current_date', 'current_time', 'lag', 'lag', 'lag', 'mod', 'log2', 'like', 'like', 'max', 'max', 'min', 'min', 'lead', 'lead', 'lead', 'log10', 'lower', 'ltrim', 'ltrim', 'first_value', 'pi', 'length', 'likely', 'json_set', 'escape_fts', 'prepare_connection_args', 'convert_units', 'sleep', 'rtreedepth', 'match', 'snippet', 'fts5_source_id', 'offsets', 'matchinfo', 'matchinfo', 'optimize', 'rtreecheck', 'rtreenode', 'highlight', 'bm25', 'fts3_tokenizer', 'fts3_tokenizer', 'fts5')`
41
42 This [returned]([https://latest.datasette.io/fixtures?sql=select+%22%27%22+||+group_concat(name%2C+%22%27%2C+%27%22)+||+%22%27%22+from+pragma_function_list()](https://sqlite-extension-examples.fly.dev/data?sql=select+name+from+pragma_function_list%28%29+where+name+not+in+%28%27pow%27%2C+%27group_concat%27%2C+%27group_concat%27%2C+%27json_type%27%2C+%27json_type%27%2C+%27julianday%27%2C+%27ntile%27%2C+%27nullif%27%2C+%27sqlite_compileoption_get%27%2C+%27json_valid%27%2C+%27json_quote%27%2C+%27json_patch%27%2C+%27-%3E%27%2C+%27json_array%27%2C+%27current_timestamp%27%2C+%27power%27%2C+%27sqlite_compileoption_used%27%2C+%27json_remove%27%2C+%27json_object%27%2C+%27json_insert%27%2C+%27-%3E%3E%27%2C+%27sin%27%2C+%27sum%27%2C+%27quote%27%2C+%27printf%27%2C+%27likelihood%27%2C+%27json_replace%27%2C+%27json_extract%27%2C+%27last_value%27%2C+%27rank%27%2C+%27sign%27%2C+%27sqrt%27%2C+%27sinh%27%2C+%27tan%27%2C+%27round%27%2C+%27round%27%2C+%27rtrim%27%2C+%27rtrim%27%2C+%27nth_value%27%2C+%27tanh%27%2C+%27random%27%2C+%27trim%27%2C+%27trim%27%2C+%27time%27%2C+%27radians%27%2C+%27trunc%27%2C+%27total%27%2C+%27substr%27%2C+%27substr%27%2C+%27replace%27%2C+%27upper%27%2C+%27subtype%27%2C+%27typeof%27%2C+%27load_extension%27%2C+%27load_extension%27%2C+%27soundex%27%2C+%27json_group_array%27%2C+%27avg%27%2C+%27abs%27%2C+%27json_group_object%27%2C+%27json_array_length%27%2C+%27json_array_length%27%2C+%27strftime%27%2C+%27atan%27%2C+%27asin%27%2C+%27acos%27%2C+%27substring%27%2C+%27substring%27%2C+%27randomblob%27%2C+%27unicode%27%2C+%27percent_rank%27%2C+%27row_number%27%2C+%27atanh%27%2C+%27asinh%27%2C+%27acosh%27%2C+%27cos%27%2C+%27atan2%27%2C+%27last_insert_rowid%27%2C+%27sqlite_log%27%2C+%27unlikely%27%2C+%27cosh%27%2C+%27ceil%27%2C+%27char%27%2C+%27unixepoch%27%2C+%27exp%27%2C+%27count%27%2C+%27count%27%2C+%27date%27%2C+%27ceiling%27%2C+%27total_changes%27%2C+%27changes%27%2C+%27sqlite_version%27%2C+%27degrees%27%2C+%27floor%27%2C+%27coalesce%27%2C+%27glob%27%2C+%27zeroblob%27%2C+%27hex%27%2C+%27iif%27%2C+%27sqlite_source_id%27%2C+%27format%27%2C+%27datetime%27%2C+%27cume_dist%27%2C+%27ln%27%2C+%27instr%27%2C+%27json%27%2C+%27dense_rank%27%2C+%27log%27%2C+%27log%27%2C+%27ifnull%27%2C+%27current_date%27%2C+%27current_time%27%2C+%27lag%27%2C+%27lag%27%2C+%27lag%27%2C+%27mod%27%2C+%27log2%27%2C+%27like%27%2C+%27like%27%2C+%27max%27%2C+%27max%27%2C+%27min%27%2C+%27min%27%2C+%27lead%27%2C+%27lead%27%2C+%27lead%27%2C+%27log10%27%2C+%27lower%27%2C+%27ltrim%27%2C+%27ltrim%27%2C+%27first_value%27%2C+%27pi%27%2C+%27length%27%2C+%27likely%27%2C+%27json_set%27%2C+%27escape_fts%27%2C+%27prepare_connection_args%27%2C+%27convert_units%27%2C+%27sleep%27%2C+%27rtreedepth%27%2C+%27match%27%2C+%27snippet%27%2C+%27fts5_source_id%27%2C+%27offsets%27%2C+%27matchinfo%27%2C+%27matchinfo%27%2C+%27optimize%27%2C+%27rtreecheck%27%2C+%27rtreenode%27%2C+%27highlight%27%2C+%27bm25%27%2C+%27fts3_tokenizer%27%2C+%27fts3_tokenizer%27%2C+%27fts5%27%29)) the following list:
43
44 ```
86 from pragma_function_list()
87 ```
88 [Try that against latest.datasette.io](https://latest.datasette.io/_memory?sql=select+json_group_array%28distinct+name%29%0D%0Afrom+pragma_function_list%28%29)
89
90 Output:
til/sqlite/cte-values.md
11 select * from x
12 ```
13 [Try that here](https://latest.datasette.io/fixtures?sql=with+x%28c1%2C+c2%2C+c3%29+as+%28%0D%0A++values%0D%0A++++%28%27a%27%2C+%27b%27%2C+3%29%2C%0D%0A++++%28%27b%27%2C+%27c%27%2C+4%29%0D%0A%29%0D%0Aselect+*+from+x).
14
15 The output of this query is:
til/sqlite/cr-sqlite-macos.md
14
15 1. Install a SQLite version that isn't the macOS default, which doesn't allow loading extensions. I used `brew install sqlite3` for this.
16 2. Download the [latest release](https://github.com/vlcn-io/cr-sqlite/releases) - at time of writing for my M2 Mac that's [v0.15.1/crsqlite-darwin-aarch64.dylib](https://github.com/vlcn-io/cr-sqlite/releases/download/v0.15.1/crsqlite-darwin-aarch64.dylib).
17 3. `cd` to the same folder as the download, then run:
18 ```bash
til/sqlite/column-combinations.md
32 num_rows desc
33 ```
34 [Try that here](https://latest.datasette.io/fixtures?sql=select%0D%0A++++case+when+%5Bpk%5D+is+not+null+then+%27pk%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bcreated%5D+is+not+null+then+%27created%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bplanet_int%5D+is+not+null+then+%27planet_int%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bon_earth%5D+is+not+null+then+%27on_earth%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bstate%5D+is+not+null+then+%27state%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5B_city_id%5D+is+not+null+then+%27_city_id%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5B_neighborhood%5D+is+not+null+then+%27_neighborhood%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Btags%5D+is+not+null+then+%27tags%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bcomplex_array%5D+is+not+null+then+%27complex_array%2C+%27+else+%27%27+end+%7C%7C%0D%0A++++case+when+%5Bdistinct_some_null%5D+is+not+null+then+%27distinct_some_null%2C+%27+else+%27%27+end%0D%0A++as+columns%2C%0D%0A++count%28*%29+as+num_rows%0D%0Afrom%0D%0A++%5Bfacetable%5D%0D%0Agroup+by%0D%0A++columns%0D%0Aorder+by%0D%0A++num_rows+desc).
35
36 This has the desired effect: it gives me back all of the combinations of not-null columns in the table, with a count for each one.
53 num_rows desc' as query from pragma_table_info(:table)
54 ```
55 [Try that out](https://latest-with-plugins.datasette.io/fixtures?sql=select+%27select%0D%0A%27+%7C%7C+group_concat%28%27++++case+when+%5B%27+%7C%7C+name+%7C%7C+%27%5D+is+not+null+then+%27+%7C%7C+quote%28name+%7C%7C+%27%2C+%27%29+%7C%7C+%27+else+%27%27%27%27+end%27%2C+%27+%7C%7C%0D%0A%27%29+%7C%7C+%27%0D%0A++as+columns%2C%0D%0A++count%28*%29+as+num_rows%0D%0Afrom%0D%0A++%5B%27+%7C%7C+%3Atable+%7C%7C+%27%5D%0D%0Agroup+by%0D%0A++columns%0D%0Aorder+by%0D%0A++num_rows+desc%27+as+query+from+pragma_table_info%28%3Atable%29&table=facetable) in a demo that includes the [datasette-query-links](https://datasette.io/plugins/datasette-query-links) plugin.
56 This takes `:table` as an input and generates SQL which can be used to generate column-combination counts.
til/sqlite/build-specific-sqlite-pysqlite-macos.md
1 # Building a specific version of SQLite with pysqlite on macOS/Linux
2
3 I wanted the ability to test my Python software against specific version of SQLite on macOS. I found a way to do that using [pysqlite3](https://github.com/coleifer/pysqlite3).
4
5 First, clone the GitHub mirror of SQLite (so I don't have to learn how to use Fossil):
31 python3 setup.py build_static build
32
33 The end result sits in a `pysqlite3` folder in, on my machine, `/tmp/pysqlite3/build/lib.macosx-10.15-x86_64-3.9` - test it like this:
34
35 cd /tmp/pysqlite3/build/lib.macosx-10.15-x86_64-3.9
54 pip install /tmp/pysqlite3/dist/pysqlite3-0.4.6-cp39-cp39-macosx_10_15_x86_64.whl
55
56 This exact same process works on Linux too (tested inside a default GitHub Actions Linux worker).
til/sqlite/blob-literals.md
7 ```
8
9 This was while writing a unit test for `datasette-media` - for [issue #19](https://github.com/simonw/datasette-media/issues/19). I used it in the test [here](https://github.com/simonw/datasette-media/blob/2cf64d949ccb8cd5f34b24aeb41b2a91de14cdd2/tests/test_media.py#L292-L295).
10
11 The SQLite documentation for [Literal values](https://www.sqlite.org/lang_expr.html#literal_values_constants_) explains how to do this:
til/sql/consecutive-groups.md
227 start desc;
228 ```
229 Grouping by `country, grp` returns a single row per group. The `min(created)` and `max(created)` functions can then find the earliest and latest dates in each group.
230
231 I'm using `date()` to turn those `created` timestamps into `YYYY-MM-DD` dates.
til/sphinx/sphinx-autodoc.md
1 # Adding Sphinx autodoc to a project, and configuring Read The Docs to build it
2
3 I built a [new API reference page](https://sqlite-utils.datasette.io/en/latest/reference.html) today for `sqlite-utils`, using the Sphinx [autodoc extension](https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html) to extract docstrings from the code and use them to build a full class reference.
4
5 I've avoided this kind of documentation in the past because I think narrative prose is a *much* better way of providing documentation - but ``sqlite-utils`` already has [detailed narrative prose](https://sqlite-utils.datasette.io/en/stable/python-api.html), so I felt that adding reference documentation powered by docstrings could enhance that project - while also providing better inline document for tools such as Visual Studio Code and Jupyter.
143 ## The end result
144
145 The new page of documentation is now live at [en/latest/reference.html](https://sqlite-utils.datasette.io/en/latest/reference.html). The pull request in which I figured this all out is [sqlite-utils/pull/312](https://github.com/simonw/sqlite-utils/pull/312).
til/sphinx/literalinclude-with-markers.md
1 # literalinclude with markers for showing code in documentation
2
3 I [wanted to include](https://github.com/simonw/datasette/issues/1830) some example Python tests in the Datasette documentation - but since they were tests, I also wanted to execute them as part of my test suite to make sure they worked correctly.
4
5 I solved this with the Sphinx [literalinclude directive](https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-literalinclude).
6
7 Here's what I put in my [docs/testing_plugins.rst#L92-L111](https://github.com/simonw/datasette/blob/0f63cb83ed31753a9bd9ec5cc71de16906767337/docs/testing_plugins.rst#L92-L111) file:
8 ```rst
9 A simple test looks like this:
10
11 .. literalinclude:: ../tests/test_docs.py
12 :language: python
13 :start-after: # -- start test_homepage --
14 :end-before: # -- end test_homepage --
15
16 Or for a JSON API:
17
18 .. literalinclude:: ../tests/test_docs.py
19 :language: python
20 :start-after: # -- start test_actor_is_null --
21 :end-before: # -- end test_actor_is_null --
22
23 To make requests as an authenticated actor, create a signed ``ds_cookie`` using the ``datasette.client.actor_cookie()`` helper function and pass it in ``cookies=`` like this:
24
25 .. literalinclude:: ../tests/test_docs.py
26 :language: python
27 :start-after: # -- start test_signed_cookie_actor --
28 :end-before: # -- end test_signed_cookie_actor --
29 ```
30 Note that the paths like `../tests/test_docs.py` are relative to the root `docs/` folder, which is a sibling of `tests/`.
31
32 Then in [tests/test_docs.py](https://github.com/simonw/datasette/blob/0f63cb83ed31753a9bd9ec5cc71de16906767337/tests/test_docs.py#L109-L141):
33 ```python
34 # fmt: off
35
36 # -- start test_homepage --
37 @pytest.mark.asyncio
38 async def test_homepage():
39 ds = Datasette(memory=True)
40 response = await ds.client.get("/")
41 html = response.text
42 assert "<h1>" in html
43 # -- end test_homepage --
44
45 # -- start test_actor_is_null --
46 @pytest.mark.asyncio
47 async def test_actor_is_null():
48 ds = Datasette(memory=True)
49 response = await ds.client.get("/-/actor.json")
50 assert response.json() == {"actor": None}
51 # -- end test_actor_is_null --
52
53 # -- start test_signed_cookie_actor --
54 @pytest.mark.asyncio
55 async def test_signed_cookie_actor():
56 ds = Datasette(memory=True)
57 cookies = {"ds_actor": ds.client.actor_cookie({"id": "root"})}
58 response = await ds.client.get("/-/actor.json", cookies=cookies)
59 assert response.json() == {"actor": {"id": "root"}}
60 # -- end test_signed_cookie_actor --
61 ```
62 The rendered documentation [can be seen here](https://docs.datasette.io/en/latest/testing_plugins.html#using-datasette-client-in-tests).
63
64 The `# fmt: off` line at the start is an instruction to [Black](https://github.com/psf/black) to ignore that section of the file. Without that, running Black would insist on adding newlines before the closing `# -- end` comments, which would show up [as blank space in the documentation](https://github.com/simonw/datasette/issues/2231).
til/sphinx/blacken-docs.md
23
24 ```python
25 @pytest.fixture
26 def datasette(tmp_path_factory):
27 # This fixture will be executed repeatedly for every test
28 ```
29 This is because of the missing function body. It turns out adding `...` (which looks prettier than `pass`) fixes this issue:
30 ```python
31 @pytest.fixture
32 def datasette(tmp_path_factory):
33 # This fixture will be executed repeatedly for every test
34 ...
35 ```
til/spatialite/minimal-spatialite-database-in-python.md
1 # Creating a minimal SpatiaLite database with Python
2
3 When writing a test for [datasette-leaflet-freedraw](https://github.com/simonw/datasette-leaflet-freedraw) I realized I didn't have a simple tiny recipe for creating an in-memory SpatiaLite database in Python. I came up with this:
4
5 ```python
26 /usr/local/lib/mod_spatialite.dylib
27 ```
28 I also remembered I have this script: [build_small_spatialite_db.py](https://github.com/simonw/datasette/blob/main/tests/build_small_spatialite_db.py)
til/spatialite/knn.md
1 # KNN queries with SpatiaLite
2
3 The latest version of SpatiaLite adds KNN support, which makes it easy to efficiently answer the question "what are the X closest records to this point".
4
5 The USGS earthquakes GeoJSON is a great dataset for experimenting with these features.
9 `https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_month.geojson` currently contains 10,642 features.
10
11 To turn that into a SpatiaLite database using the latest version of [geojson-to-sqlite](https://github.com/simonw/geojson-to-sqlite):
12 ```bash
13 curl 'https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_month.geojson' | \
til/shot-scraper/axe-core.md
24 ```json
25 {
26 "testEngine": {
27 "name": "axe-core",
28 "version": "4.7.2"
29 },
30 "testRunner": {
31 "name": "axe"
32 },
33 "testEnvironment": {
34 "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/115.0.5790.75 Safari/537.36",
35 "windowWidth": 1280,
til/selenium/selenium-python-macos.md
92 print(firefox.find_element_by_css_selector('body').text)
93 ```
94 I used `wget` for the download (rather than clicking the link in my browser) thanks to the warning here: https://firefox-source-docs.mozilla.org/testing/geckodriver/Notarization.html
95
96 An easier option: install it with Homebrew:
til/readthedocs/stable-docs.md
1 # Updating stable docs in ReadTheDocs without pushing a release
2
3 I use [ReadTheDocs](https://readthedocs.org/) for several of my projects. It's fantastic: among other things, it makes it easy to publish the documentation for my latest `main` branch at `/latest/` and the documentation for my latest release at `/stable/` (as well as maintain archived tag URLs for every prior release).
4
5 I can then configure the main page of my project's documentation to redirect to `/stable/` by default.
30 After some [extensive experimentation](https://github.com/simonw/simonw-readthedocs-experiments/issues/1) (that's an issue thread with 43 comments, all by me) I've found a solution.
31
32 The basic shape is this: rather than having ReadTheDocs serve `/stable/` from the latest tagged release of my project, I instead maintain a `stable` branch in the GitHub repository. It's this branch that becomes the default documentation on my documentation sites.
33
34 Then I use GitHub Actions to automate the process of updating that branch. In particular:
59 jobs:
60 update_stable_docs:
61 runs-on: ubuntu-latest
62 steps:
63 - name: Checkout repository
214 docs/cli.rst
215 sqlite_utils/cli.py
216 tests/test_cli.py
217 ```
218
249 Here's the sequence of steps.
250
251 - Go to the project's admin page, click advanced settings and switch the default version from "stable" to "latest" - then scroll to the very bottom of the page to find the save button. This step is necessary because you can't delete the "stable" version if it is set as the default.
252 - Visit the Versions tab, edit the "stable" version and uncheck the "active" box. This is the same thing as deleting it.
253 - Find the "stable" branch on that page and activate that instead. This will trigger a build of your `stable` branch and cause it to be hosted at `/stable/` on ReadTheDocs.
til/readthedocs/pip-install-docs.md
10 # ...
11 extras_require={
12 "test": ["pytest", "pytest-asyncio", "black", "cogapp", "ruff"],
13 "docs": [
14 "sphinx==7.2.6",
til/readthedocs/link-from-latest-to-stable.md
1 # Linking from /latest/ to /stable/ on Read The Docs
2
3 [Read The Docs](https://readthedocs.org/) has a handy feature where documentation for older versions will automatically link to the latest release, for example [on this page](https://docs.datasette.io/en/0.56/spatialite.html):
4
5 <img width="978" alt="A documentation page with a note that says: You are not reading the most recent version of this documentation. 0.60 is the latest version available." src="https://user-images.githubusercontent.com/9599/150437341-14554fe7-1c47-4462-a1d9-9b8d822aaea8.png">
6
7 That feature is enabled by a "Show version warning" check box in their Advanced Settings preference pane.
9 It's implemented by [this JavaScript](https://github.com/readthedocs/readthedocs.org/blob/0852d7c10d725d954d3e9a93513171baa1116d9f/readthedocs/core/static-src/core/js/doc-embed/version-compare.js#L13-L21) in their default theme, called [from here](https://github.com/readthedocs/readthedocs.org/blob/bc3e147770e5740314a8e8c33fec5d111c850498/readthedocs/core/static-src/core/js/doc-embed/footer.js#L66-L86).
10
11 I had an extra requirement: I wanted pages on my `/en/latest/` documentation (which shows documentation for the in-development `main` branch on GitHub) to link back to the `/en/stable/` equivalent - but only if that page also existed in the stable documentation.
12
13 I ended up [adding this snippet](https://github.com/simonw/datasette/commit/ffca55dfd7cc9b53522c2e5a2fa1ff67c9beadf2) of jQuery JavaScript to my custom ` docs/_templates/layout.html` template:
18 <script>
19 jQuery(function ($) {
20 // Show banner linking to /stable/ if this is a /latest/ page
21 if (!/\/latest\//.test(location.pathname)) {
22 return;
23 }
24 var stableUrl = location.pathname.replace("/latest/", "/stable/");
25 // Check it's not a 404
26 fetch(stableUrl, { method: "HEAD" }).then((response) => {
47 {% endblock %}
48 ```
49 The neatest piece of this solution is the way it uses an HTTP `HEAD` request via `fetch()` to confirm that the equivalent stable page exists before adding a link to it:
50 ```javascript
51 var stableUrl = location.pathname.replace("/latest/", "/stable/");
52 // Check it's not a 404
53 fetch(stableUrl, { method: "HEAD" }).then((response) => {
56 ```
57
58 Here's what my fix looks like, running on https://docs.datasette.io/en/latest/csv_export.html
59
60 <img width="978" alt="This page has a banner that says: This documentation covers the development version of Datasette. See this page for the current stable release." src="https://user-images.githubusercontent.com/9599/150438021-0ab3db8f-7f65-4846-b2d4-880e10dce79d.png">
62 ## Alternative solution: sphinx-version-warning
63
64 Just minutes after I committed my fix I was informed of the existence of [sphinx-version-warning](https://sphinx-version-warning.readthedocs.io/en/latest/), a Sphinx plugin that can solve this problem too. There's an example of using that to add a message to the `/latest/` page in [its own documentation configuration here](https://github.com/humitos/sphinx-version-warning/blob/a82156c2ea08e5feab406514d0ccd9d48a345f48/docs/conf.py#L32-L38).
65
66 ```python
67 # -- Version Warning Banner configuration ------------------------------------
68 versionwarning_messages = {
69 'latest': 'This is a custom message only for version "latest" of this documentation.',
70 }
71 versionwarning_admonition_type = 'tip'
75 I decided to stick with my version, mainly because I like the `fetch()` solution I used.
76
77 GitHub issue: [ Documentation should clarify /stable/ vs /latest/ #1608](https://github.com/simonw/datasette/issues/1608)
til/readthedocs/documentation-seo-canonical.md
3 I was thinking about documentation SEO today. Like many projects, Datasette offers multiple versions of the documentation:
4
5 - https://docs.datasette.io/en/latest/ is the latest `main` branch on GitHub
6 - https://docs.datasette.io/en/stable/ is the most recent stable (non alpha or beta) release - currently 0.60
7 - https://docs.datasette.io/en/0.59.4/ is the documentation for that specific version - I have more than 70 of those now
23 <link rel="canonical" href="https://docs.datasette.io/en/stable/introspection.html" />
24 ```
25 Here's [their documentation](https://docs.readthedocs.io/en/latest/custom_domains.html#canonical-urls) covering this feature. I think you need to have configured a "default version" (though they may set a sensible default for that already) - for my project the page for doing that is the Advanced settings page at https://readthedocs.org/dashboard/datasette/advanced/
26
27 This TIL started life as [a Twitter thread](https://twitter.com/simonw/status/1484287724773203971).
til/python/too-many-open-files-psutil.md
1 # Using psutil to investigate "Too many open files"
2
3 I was getting this intermittent error running my Datasette test suite:
4
5 OSError: [Errno 24] Too many open files
7 To figure out what was going on, I used the `psutil` package and its `open_files()` method.
8
9 Here's the documentation for [psutil.Process.open_files](https://psutil.readthedocs.io/en/latest/#psutil.Process.open_files).
10
11 I ran `pip install psutil` in my virtual environment.
12
13 Then I ran `pytest --pdb` to drop into a Python debugger when a test failed.
14
15 In the debugger I ran this:
til/python/struct-endianness.md
7 ```python
8 def decode_matchinfo(buf):
9 # buf is a bytestring of unsigned integers, each 4 bytes long
10 return struct.unpack("I" * (len(buf) // 4), buf)
11 ```
44 ```python
45 def decode_matchinfo(buf):
46 # buf is a bytestring of unsigned integers, each 4 bytes long
47 return struct.unpack("<" + ("I" * (len(buf) // 4)), buf)
48 ```
til/python/stdlib-cli-tools.md
42 Then ran `rg`:
43 ```
44 rg 'if __name__ =' -l | grep -v 'test/' \
45 | grep -v 'tests/' | grep -v idlelib | grep -v turtledemo
46 ```
47 The `-l` option causes `ripgrep` to list matching files without showing the context of the match.
52 >
53 > ```bash
54 > rg 'if __name__ =' -l | grep -v -e 'test/' -e 'tests/' -e idlelib -e turtledemo
55 > ```
56 > `grep -v` still means "everything that doesn't match this" - but then the multiple `-e '...'` patterns are used to construct a "this pattern or this pattern or this pattern" filter. This saves on having to pipe through `grep -v` multiple times. Thanks for the tip, [dfc](https://news.ycombinator.com/item?id=36519308).
85 getopt.py
86 dbm/__init__.py
87 doctest.py
88 pickle.py
89 imaplib.py
290 ### nntplib
291
292 "nntplib built-in demo - display the latest articles in a newsgroup"
293
294 It defaults to `gmane.comp.python.general`:
til/python/setup-py-from-url.md
7 ```python
8 extras_require={
9 "test": ["pytest", "black", "hypothesis", "cogapp"],
10 "docs": ["furo", "sphinx-autobuild", "codespell", "sphinx-copybutton"],
11 "mypy": [
36 ```python
37 extras_require={
38 "test": ["pytest", "black", "hypothesis", "cogapp"],
39 "docs": [
40 "furo",
til/python/rye.md
1 # A few notes on Rye
2
3 [Rye](https://github.com/mitsuhiko/rye) is Armin Ronacher's new experimental Python packaging tool. I decided to take it for a test-run.
4
5 ## Installing Rye
til/python/quick-testing-pyenv.md
1 # Quickly testing code in a different Python version using pyenv
2
3 I had [a bug](https://github.com/simonw/llm/issues/82#issuecomment-1629735729) that was only showing up in CI against Python 3.8.
4
5 I used the following pattern with [pyenv](https://github.com/pyenv/pyenv) to quickly run the tests against that specific version.
6
7 (I had previously installed `pyenv` using `brew install pyenv`.)
58 Then installed my current project into that environment like so:
59 ```bash
60 /tmp/py38env/bin/pip install -e '.[test]'
61 ```
62 Now I can run the tests like this:
63 ```bash
64 /tmp/py38env/bin/pytest
65 ```
til/python/pypy-macos.md
44 sys.exit(cli())
45 ```
46 Then I tested an import against a large CSV file like so:
47 ```
48 (venv) /tmp % time sqlite-utils insert pypy.db t /tmp/en.openfoodfacts.org.products.csv --csv
til/python/pyproject.md
135 ## It finds the Python files for you
136
137 With `setup.py` I'm used to putting quite a bit of effort into telling Python which files should be included in the package - and making sure it doesn't include the `tests/` and `docs/` folder.
138
139 As far as I can tell, the default behaviour now is to find all `*.py` files and all `*/*.py` files and include those - but to exclude common patterns such as `tests/` and `docs/` and `tests.py` and `test_*.py`.
140
141 This behaviour is defined by `setuptools`. The [Automatic Discovery](https://setuptools.pypa.io/en/latest/userguide/package_discovery.html#automatic-discovery) section of the `setuptools` documentation describes these rules in detail.
142
143 ## Adding metadata
203 ## Test dependencies
204
205 I like being able to run `pip install -e '.[test]'` to install test dependencies - things like `pytest`, which are needed to run the project tests but shouldn't be bundled with the project itself when it is installed.
206
207 Those can be added in a section like this:
209 ```toml
210 [project.optional-dependencies]
211 test = ["pytest"]
212 ```
213 I added that to my `/tmp/demo-package/pyproject.toml` file, then ran this in my elsewhere virtual environment:
214 ```bash
215 pip install -e '/tmp/demo-package[test]'
216 ```
217 The result was an installation of `pytest`, visible when I ran `pip freeze`.
218
219 ## Package data
til/python/pip-tools.md
14 psycopg2-binary
15 dj-database-url
16 pytest-django
17 django-extensions
18 django-htmx
21 httpx
22 sentry-sdk
23 pytest-httpx
24 ics==0.7
25 ```
28 > `django.utils.deprecation.RemovedInDjango41Warning: 'django_extensions' defines default_app_config = 'django_extensions.apps.DjangoExtensionsConfig'.`
29
30 I'm pinning `ics` to the exact version 0.7 due to a broken test I experienced with more recent versions.
31
32 ## Compiling requirements.txt from requirements.in
til/python/pdb-interact.md
3 Today [Carlton told me](https://twitter.com/carltongibson/status/1587155176590385159) about the [interact command](https://docs.python.org/3.10/library/pdb.html#pdbcommand-interact) in the Python debugger.
4
5 Here's how to use it with `pytest` (but it works anywhere else where you find yourself in a `pdb` session).
6
7 Use `pytest --pdb` to cause `pytest` to open a debugger at the first failed assertion (I added `assert False` to my test suite to demonstrate this).
8
9 Then type `interact` to drop into a full Python interactive prompt that keeps all of the local and global variables from the debugger:
10
11 ```
12 % pytest -k test_drop --pdb
13 ======== test session starts ========
14 platform darwin -- Python 3.10.3, pytest-7.1.3, pluggy-1.0.0
15 ...
16 > assert False
17 E assert False
18
19 tests/test_api_write.py:272: AssertionError
20 >>>> entering PDB >>>>
21
22 >>>> PDB post_mortem (IO-capturing turned off) >>>>
23 > /Users/simon/Dropbox/Development/datasette/tests/test_api_write.py(272)test_drop_table()
24 -> assert False
25 (Pdb) interact
26 >>> locals().keys()
27 dict_keys(['__name__', '__doc__', '__package__', '__loader__', '__spec__', '__file__', '__cached__', '__builtins__',
28 '@py_builtins', '@pytest_ar', 'Datasette', 'sqlite3', 'pytest', 'time', 'ds_write', 'write_token', 'test_write_row',
29 'test_write_rows', 'test_write_row_errors', 'test_delete_row', 'test_drop_table', 'scenario', 'token', 'should_work',
30 'path', 'response', '@py_assert0', '@py_format2'])
31 ```
til/python/packaging-pyinstaller.md
67 I solved this by adding each `ModuleNotFoundError` module to `--hidden-import` until it worked.
68
69 I've tested this script (and the generated executables) on both macOS and Ubuntu Linux so far, and it's worked perfectly in both cases. See [issue 93](https://github.com/simonw/datasette/issues/93) for more details.
til/python/os-remove-windows.md
1 # os.remove() on Windows fails if the file is already open
2
3 I puzzled over this one for [quite a while](https://github.com/simonw/sqlite-utils/issues/503) this morning. I had this test that was failing with Windows on Python 3.11:
4
5 ```python
6 @pytest.mark.parametrize(
7 "use_path,file_exists", [(True, True), (True, False), (False, True), (False, False)]
8 )
9 def test_recreate(tmpdir, use_path, file_exists):
10 filepath = str(tmpdir / "data.db")
11 if use_path:
17 assert ["t2"] == Database(filepath).table_names()
18 ```
19 The test checks that the `recreate=True` option to my `Database()` constructor deletes and re-creates the file.
20
21 Here's [the implementation](https://github.com/simonw/sqlite-utils/blob/9cbe19ac0547031f3b626d9d18ef05c0d193bf79/sqlite_utils/db.py#L320-L323) of that `recreate=True` option:
28 On Windows I was getting the following exception:
29 ```
30 FAILED tests/test_recreate.py::test_recreate[True-True] -
31 PermissionError: [WinError 32] The process cannot access the file because it is being used by another process:
32 'C:\\Users\\runneradmin\\AppData\\Local\\Temp\\pytest-of-runneradmin\\pytest-0\\test_recreate_True_True_0\\data.db'
33 ```
34 Eventually I spotted the problem: my call on this line was opening a SQLite connection to the `data.db` file:
39 But it wasn't explicitly closing the SQLite connection. It turns out that leaves the database file open - and since the file is still open Windows raised an exception when `os.remove()` was called against it.
40
41 I fixed the error by closing the SQLite3 connection in my test, like this:
42
43 ```python
til/python/locust.md
1 # Simple load testing with Locust
2
3 I've been using [Locust](https://locust.io/) recently to run some load tests - most significantly [these tests](https://github.com/simonw/django_sqlite_benchmark/issues?q=is%3Aissue+is%3Aclosed) against SQLite running with Django and [this test](https://github.com/simonw/datasette-gunicorn/issues/1) exercising Datasette and Gunicorn.
4
5 ## A really basic test
6
7 Locust tests are defined in a `locustfile.py` file. Here's the most basic possible test, which sends requests to the `/` page of a web application:
8
9 ```python
22 locust
23
24 This opens a web server on http://0.0.0.0:8089/ (by default) which offers an interface for starting a new test:
25
26 <img width="484" alt="A form with fields for number of users (peak concurrency), spawn rate (users started/second) and host (the URL to start the tests at)" src="https://user-images.githubusercontent.com/9599/197367313-8b4c85b9-cd0e-45ed-adc2-789116687cb5.png">
27
28 You can run this for as long as you like, and it will produce both statistics on the load test and some pleasing charts:
29
30 <img width="1049" alt="Charts for total requests per second over time and response times in ms" src="https://user-images.githubusercontent.com/9599/197367351-03735a41-00cb-4bbf-a13d-cb22bbaf6e3b.png">
32 ## Using the command-line
33
34 You can also run tests without the web server at all. I tend to use this option as it's quicker to repeat a test, and you can easily copy and paste the results into a GitHub issue thread.
35
36 locust --headless --users 4 --spawn-rate 2 -H http://127.0.0.1:8001
37
38 This runs the tests in the current `locustfile.py` against `http://127.0.0.1:8001`, with four concurrent users and ramping up at 2 users every second (so taking two seconds to ramp up to full concurrency).
39
40 Hit `Ctrl+C` to end the test. It will end up producing something like this:
41
42 ```
55 ```
56
57 ## More complicated tests
58
59 Locust tests can get a lot more complex than this. The documentation [provides this example](https://docs.locust.io/en/stable/writing-a-locustfile.html):
60
61 ```python
til/python/introspect-function-parameters.md
18 ```
19
20 And here's an illustrative unit test:
21
22 ```python
23 def test_call_with_supported_arguments():
24 def foo(a, b):
25 return "{}+{}".format(a, b)
28 assert "1+2" == utils.call_with_supported_arguments(foo, a=1, b=2, c=3)
29
30 with pytest.raises(TypeError):
31 utils.call_with_supported_arguments(foo, a=1)
32 ```
til/python/installing-upgrading-plugins-with-pipx.md
56 ]
57
58 I added all of this to the Datasette docs here: https://docs.datasette.io/en/latest/installation.html#using-pipx
59 (see https://github.com/simonw/datasette/issues/756).
til/python/generate-nested-json-summary.md
1 # Generated a summary of nested JSON data
2
3 I was trying to figure out the shape of the JSON object from https://github.com/simonw/coronavirus-data-gov-archive/blob/master/data_latest.json?raw=true - which is 3.2MB and heavily nested, so it's difficult to get a good feel for the shape.
4
5 I solved this with a Python `summarize()` function which recursively truncates the nested lists and dictionaries.
28 import json, requests
29 data = requests.get(
30 "https://github.com/simonw/coronavirus-data-gov-archive/blob/master/data_latest.json?raw=true"
31 ).json()
32 print(json.dumps(summarize(data, list_limit=2, key_limit=7), indent=4))
til/python/cog-to-update-help-in-readme.md
11 import cog
12 from csvs_to_sqlite import cli
13 from click.testing import CliRunner
14 runner = CliRunner()
15 result = runner.invoke(cli.cli, ["--help"])
37 ## Testing with cog --check
38
39 A version of Cog released after I first wrote this TIL added a new `--check` option, so you can run a test in CI to check if the file needs to be updated using:
40
41 cog --check README.md
42
43 ## Writing a test (before cog --check)
44
45 Any time I generate content like this in a repo I like to include a test that will fail if I forget to update the content.
46
47 `cog` clearly isn't designed to be used as an independent library, but I came up with the following pattern `pytest` test which works well, in my `tests/test_csvs_to_sqlite.py` module:
48
49 ```python
54
55
56 def test_if_cog_needs_to_be_run():
57 _stdout = sys.stdout
58 sys.stdout = StringIO()
77 Cog then writes the generated output to `stdout` - which I capture with that `sys.stdout` trick.
78
79 Finally, I compare the generated output to the current file content and fail the test with a reminder to run `cog -r` if they do not match.
80
81 ## Cog for reStructuredText
til/python/codespell.md
til/python/callable.md
33 `check_callable(obj)` returns a `CallableStatus` named tuple, with an `is_callable` boolean saying if it can be caled and an `is_async_callable` boolean specifying if you need to use `await` with it.
34
35 I wrote these `pytest` tests to exercise the `check_callable()` function:
36
37 ```python
38 import pytest
39
40
61
62
63 @pytest.mark.parametrize(
64 "obj,expected_is_callable,expected_is_async_callable",
65 (
77 ),
78 )
79 def test_check_callable(obj, expected_is_callable, expected_is_async_callable):
80 status = check_callable(obj)
81 assert status.is_callable == expected_is_callable
til/python/call-pip-programatically.md
3 I needed this for the `datasette install` and `datasette uninstall` commands, see [issue #925](https://github.com/simonw/datasette/issues/925).
4
5 My initial attempt at this resulted in weird testing errors ([#928](https://github.com/simonw/datasette/issues/928)) - while investigating them I stumbled across [this comment](https://github.com/pypa/pip/blob/e060970d51c5946beac8447eb95585d83019582d/src/pip/_internal/cli/main.py#L23-L47) in the `pip` source code:
6
7 ```
41 run_module("pip", run_name="__main__")
42 ```
43 And here's how I wrote [a unit test](https://github.com/simonw/datasette/blob/afdeda8216d4d3027f87583ccdbef17ad85022ef/tests/test_cli.py#L114-L124) for it:
44 ```python
45 @mock.patch("datasette.cli.run_module")
46 def test_install(run_module):
47 runner = CliRunner()
48 runner.invoke(cli, ["install", "datasette-mock-plugin", "datasette-mock-plugin2"])
til/pytest/test-click-app-with-streaming-input.md
1 # Testing a Click app with streaming input
2
3 For [sqlite-utils#364](https://github.com/simonw/sqlite-utils/issues/364) I needed to write a test for a [Click](https://click.palletsprojects.com/) app which dealt with input streamed to standard input. I needed to run some assertions during that process, which ruled out the usual [CliRunner.invoke()](https://click.palletsprojects.com/en/8.0.x/testing/) testing tool since that works by running the command until completion.
4
5 I decided to use `subprocess` to run the application. Here's the pattern I came up with for the test:
6 ```python
7 def test_insert_streaming_batch_size_1(db_path):
8 # https://github.com/simonw/sqlite-utils/issues/364
9 # Streaming with --batch-size 1 should commit on each record
46 I realized I needed to call `proc.stdin.flush()` after each write to ensure the write was pushed to the process in a predictable manner.
47
48 At the end of the test, running `proc.stdin.close()` is equivalent to sending an end-of-file, then `proc.wait()` ensures the process has finished and terminated.
til/pytest/treat-warnings-as-errors.md
1 # Treating warnings as errors in pytest
2
3 I was seeing this warning in a Django project when I thought I was correctly using timezone-aware dates everywhere:
5 > RuntimeWarning: DateTimeField Shift.shift_start received a naive datetime (2022-04-01 00:00:00) while time zone support is active
6
7 Running `pytest -Werror` turns those warnings into errors that fail the tests.
8
9 Which means you can investigate them in the Python debugger by running:
10
11 pytest -Werror --pdb -x
12
13 The `--pdb` starts the debugger at the warning (now error) and the `-x` stops the tests after the first failure.
14
15 ## In pytest.ini
16
17 You can also set this in `pytest.ini` - useful if you want ALL warnings to be failures in both development and CI.
18
19 Add the following to the `pytest.ini` file:
20
21 ```ini
22 [pytest]
23 # ...
24 filterwarnings =
31
32 ```ini
33 [pytest]
34 # ...
35 filterwarnings =
til/pytest/syrupy.md
1 # Snapshot testing with Syrupy
2
3 I'm a big fan of snapshot testing - writing tests where you compare the output of some function to a previously saved version, and can re-generate that version from scratch any time something changes.
4
5 I usually do this by hand - I run `pytest -x --pdb` to stop at the first failing test and drop into a debugger, then copy out the representation of the generated value and copy it into the test. I wrote about how I use this pattern a few years ago in [How to cheat at unit tests with pytest and Black](https://simonwillison.net/2020/Feb/11/cheating-at-unit-tests-pytest-black/).
6
7 Today I learned how to do the same thing with the [Syrupy](https://github.com/tophat/syrupy) plugin for [pytest](https://docs.pytest.org/). I think I'll be using this for many of my future projects.
8
9 ## Some initial tests
10
11 I created a `tests/test_stuff.py` file with the following contents:
12
13 ```python
14 def test_one(snapshot):
15 assert "hello" == snapshot
16
17
18 def test_two(snapshot):
19 assert snapshot == {"foo": [1, 2, 3], "bar": {"baz": "qux"}}
20 ```
21 Then I installed both `pytest` and `syrupy`:
22
23 ```bash
24 pip install pytest syrupy
25 ```
26 Now in my parent folder I can run this:
27 ```bash
28 pytest
29 ```
30 And the tests fail:
31 ```
32 tests/test_stuff.py FF [100%]
33
34 ======================================== FAILURES =========================================
35 ________________________________________ test_one _________________________________________
36
37 snapshot = SnapshotAssertion(name='snapshot', num_executions=1)
38
39 def test_one(snapshot):
40 > assert "hello" == snapshot
41 E AssertionError: assert [+ received] == [- snapshot]
42 E Snapshot 'test_one' does not exist!
43 E + 'hello'
44
45 tests/test_stuff.py:2: AssertionError
46 ________________________________________ test_two _________________________________________
47
48 snapshot = SnapshotAssertion(name='snapshot', num_executions=1)
49
50 def test_two(snapshot):
51 > assert snapshot == {"foo": [1, 2, 3], "bar": {"baz": "qux"}}
52 E AssertionError: assert [- snapshot] == [+ received]
53 E Snapshot 'test_two' does not exist!
54 E + dict({
55 E + 'bar':
57 E ...Full output truncated (9 lines hidden), use '-vv' to show
58
59 tests/test_stuff.py:5: AssertionError
60 --------------------------------- snapshot report summary ---------------------------------
61 2 snapshots failed.
62 ================================= short test summary info =================================
63 FAILED tests/test_stuff.py::test_one - AssertionError: assert [+ received] == [- snapshot]
64 FAILED tests/test_stuff.py::test_two - AssertionError: assert [- snapshot] == [+ received]
65 ==================================== 2 failed in 0.05s ====================================
66 ```
67 The snapshots don't exist yet. But I can create them automatically by running this:
68 ```bash
69 pytest --snapshot-update
70 ```
71 Which outputs passing tests along with:
72 ```
73 --------------------------------- snapshot report summary ---------------------------------
75 ==================================== 2 passed in 0.01s ====================================
76 ```
77 And sure enough, there's now a new folder called `tests/__snapshots__` with a file called `test_stuff.ambr` which contains this:
78 ```
79 # serializer version: 1
80 # name: test_one
81 'hello'
82 # ---
83 # name: test_two
84 dict({
85 'bar': dict({
94 # ---
95 ```
96 Running `pytest` again passes, because the snapshots exist and continue to match the test output.
97
98 The serialized snapshot format is designed to be checked into Git. It's pleasantly readable - I can review that and see what it's testing, and I could even update it by hand - though I'll much more likely use the `--snapshot-update` flag and then eyeball the differences.
99
100 ## Adding a dataclass
112
113
114 def test_one(snapshot):
115 assert "hello" == snapshot
116
117
118 def test_two(snapshot):
119 assert snapshot == {"foo": [1, 2, 3], "bar": {"baz": "qux"}}
120
121
122 def test_three(snapshot):
123 assert Foo(1, "hello") == snapshot
124 ```
125 Running `pytest` again failed. `pytest --snapshot-update` passed and updated my snapshot file, adding this to it:
126 ```
127 # name: test_three
128 Foo(bar=1, baz='hello')
129 ```
138 self.bar = bar
139
140 def test_four(snapshot):
141 assert WeirdClass(1, 2) == snapshot
142 ```
143 Serialized to:
144 ```
145 # name: test_four
146 WeirdClass(
147 bar=2,
til/pytest/show-files-opened-by-tests.md
1 # Show files opened by pytest tests
2
3 My test suite for [Datasette](https://github.com/simonw/datasette) has grown so large that running the whole thing sometimes causes me to run out of file handles.
4
5 I've not solved this yet, but I did figure out a pattern to get `pytest` to show me which new files were opened by which tests.
6
7 Add the following to `conftest.py`:
8
9 ```
11
12
13 @pytest.fixture(autouse=True)
14 def check_for_new_file_handles(request):
15 proc = psutil.Process()
22 ```
23
24 This uses [psutil](https://pypi.org/project/psutil/) (`pip install psutil`) to build a set of the open files before and after the test runs. It then uses a list comprehension to figure out which file handles are new.
25
26 Using `@pytest.fixture(autouse=True)` means it will automatically be used for every test.
27
28 It's a `yield` fixture, which means the part of the code before the `yield` statement runs before the test, then the part afterwards runs after the test function has finished.
29
30 Accepting the `request` argument means it gets access to a `pytest` request object, which includes `request.node` which is an object representing the test that is being executed.
31
32 You need to run `pytest -s` to see the output (without the `-s` the output is hidden).
33
34 Example output:
35
36 `tests/test_permissions.py .[popenfile(path='/private/var/folders/wr/hn3206rs1yzgq3r49bz8nvnh0000gn/T/tmp76w2ukin/fixtures.db', fd=8)] opened by <Function test_view_padlock[/-None-200-200]>..`
til/pytest/subprocess-server.md
1 # Start a server in a subprocess during a pytest session
2
3 I wanted to start an actual server process, run it for the duration of my pytest session and shut it down at the end.
4
5 Here's the recipe I came up with. This fixture lives in `conftest.py`:
6
7 ```python
8 import pytest
9 import sqlite_utils
10 import subprocess
11
12 @pytest.fixture(scope="session")
13 def ds_server(tmp_path_factory):
14 db_directory = tmp_path_factory.mktemp("dbs")
15 db_path = db_directory / "test.db"
16 db = sqlite_utils.Database(db_path)
17 insert_test_data(db)
18 ds_proc = subprocess.Popen(
19 [
31 assert not ds_proc.poll(), ds_proc.stdout.read().decode("utf-8")
32 yield ds_proc
33 # Shut it down at the end of the pytest session
34 ds_proc.terminate()
35 ```
36 A test looks like this:
37 ```python
38 import httpx
39
40 def test_server_starts(ds_server):
41 response = httpx.get("http://127.0.0.1:8041/")
42 assert response.status_code == 200
45 ## Alternative recipe for serving static files
46
47 While [adding tests to Datasette Lite](https://github.com/simonw/datasette-lite/issues/35) I found myself needing to run a localhost server that served static files directly.
48
49 I completely forgot about this TIL, and instead took inspiration [from pytest-simplehttpserver](https://github.com/ppmdo/pytest-simplehttpserver/blob/a82ad31912121c074ff1a76c4628a1c42c32b41b/src/pytest_simplehttpserver/pytest_plugin.py#L17-L28) - coming up with this pattern:
50
51 ```python
52 from subprocess import Popen, PIPE
53 import pathlib
54 import pytest
55 import time
56 from http.client import HTTPConnection
59
60
61 @pytest.fixture(scope="module")
62 def static_server():
63 process = Popen(
85 Again, including `static_server` as a fixture is enough to ensure requests to `http://localhost:8123/` will be served by that temporary server.
86
87 I like how this version polls for a successful HEAD request (a trick inspired by `pytest-simplehttpserver`) rather than just sleeping.
til/pytest/session-scoped-tmp.md
1 # Session-scoped temporary directories in pytest
2
3 I habitually use the `tmpdir` fixture in pytest to get a temporary directory that will be cleaned up after each test, but that doesn't work with `scope="session"` - which can be used to ensure an expensive fixture is run only once per test session and the generated content is used for multiple tests.
4
5 To get a temporary directory that works with `scope="session"`, use the `tmp_path_factory` built-in pytest fixture like this:
6
7 ```python
8 import pytest
9
10
11 @pytest.fixture(scope="session")
12 def template_dir(tmp_path_factory):
13 template_dir = tmp_path_factory.mktemp("page-templates")
19
20
21 def test_about(template_dir):
22 assert "ABOUT!" == (template_dir / "pages" / "about.html").read_text()
23
24
25 def test_request(template_dir):
26 assert "request" == (template_dir / "pages" / "request.html").read_text()
27 ```
28
29 Example: https://github.com/simonw/datasette/blob/1b7b66c465e44025ec73421bd69752e42f108321/tests/test_custom_pages.py#L16-L45
til/pytest/pytest-subprocess.md
1 # Mocking subprocess with pytest-subprocess
2
3 For [apple-notes-to-sqlite](https://github.com/dogsheep/apple-notes-to-sqlite) I needed to write some tests that simulated executing the `osascript` command using the Python `subprocess` module.
4
5 I wanted my tests to run on Linux CI machines, where that command would not exist.
6
7 After failing to use `unittest.mock.patch` to solve this, I went looking for alternatives. I found [pytest-subprocess](https://pypi.org/project/pytest-subprocess/).
8
9 Here's the relevant section of [the test I wrote](https://github.com/dogsheep/apple-notes-to-sqlite/blob/0.1/tests/test_apple_notes_to_sqlite.py):
10
11 ```python
17 """
18
19 def test_apple_notes_to_sqlite_dump(fp):
20 fp.register_subprocess(["osascript", "-e", COUNT_SCRIPT], stdout=b"2")
21 fp.register_subprocess(["osascript", "-e", fp.any()], stdout=FAKE_OUTPUT)
25 # ...
26 ```
27 `fp` is the fixture provided by the package (you need to `pip install pytest-subprocess` for this to work).
28
29 `COUNT_SCRIPT` here is the first of my `osascript` constants. It looks like this (in `cli.py`):
51 I eventually figured that using `fp.any()` was easier than specifying the exact script. This is a wildcard value which matches any string. It returns the full `FAKE_OUTPUT` variable as the simulated standard out.
52
53 What's useful about `pytest-subprocess` is that it works for both `subprocess.check_output()` and more complex `subprocess.Popen()` calls - both of which I was using in this script.
til/pytest/registering-plugins-in-tests.md
1 # Registering temporary pluggy plugins inside tests
2
3 While implementing more finely-grained permissions for `datasette-insert-api` ([issue 8](https://github.com/simonw/datasette-insert-api/issues/8)) I decided I wanted to register a Datasette pluggy plugin for the duration of a single test.
4
5 Here's the pattern I figured out for doing that:
8 from datasette import hookimpl
9 from datasette.plugins import pm
10 import pytest
11
12
13 def test_using_test_plugin():
14 class TestPlugin:
15 __name__ = "TestPlugin"
22 pm.register(TestPlugin(), name="undo")
23 try:
24 # Rest of test goes here
25 finally:
26 pm.unregister(name="undo")
27 ```
28
29 Here's [an example](https://github.com/simonw/datasette-insert/blob/7f4c2b3954190d547619d043bbe714481b10ac1e/tests/test_insert_api.py) of a test that uses a pytest fixture to register (and de-register) a plugin:
30
31 ```python
33 from datasette.app import Datasette
34 from datasette.plugins import pm
35 import pytest
36
37
38 @pytest.fixture
39 def unsafe():
40 class UnsafeInsertAll:
51
52
53 @pytest.mark.asyncio
54 async def test_insert_alter(ds, unsafe):
55 async with httpx.AsyncClient(app=ds.app()) as client:
56 response = await client.post(
til/pytest/pytest-recording-vcr.md
1 # Using VCR and pytest with pytest-recording
2
3 [pytest-recording](https://github.com/kiwicom/pytest-recording) is a neat pytest plugin that makes it easy to use the [VCR library](https://vcrpy.readthedocs.io/), which helps write tests against HTTP resources by automatically capturing responses and baking them into a YAML file to be replayed during the tests.
4
5 It even works with [boto3](https://aws.amazon.com/sdk-for-python/)!
6
7 To use it, first install it with `pip install pytest-recording` and then add the `@pytest.mark.vcr` decorator to a test that makes HTTP calls:
8
9 ```python
10 @pytest.mark.vcr
11 def test_create():
12 runner = CliRunner()
13 with runner.isolated_filesystem():
14 result = runner.invoke(cli, ["create", "pytest-bucket-simonw-1", "-c"])
15 assert result.exit_code == 0
16 ```
17
18 The first time you run the tests, use the `--record-mode=once` option:
19
20 pytest -k test_create --record-mode=once
21
22 This defaults to creating a YAML file in `tests/cassettes/test_s3_credentials/test_create.yaml`.
23
24 Subsequent runs of `pytest -k test_create` will reuse those recorded HTTP requests and will not make any network requests - I confirmed this by turning off my laptop's WiFi.
til/pytest/pytest-code-coverage.md
1 # Code coverage using pytest and codecov.io
2
3 I got my [asgi-csrf](https://github.com/simonw/asgi-csrf) Python package up to 100% code coverage. Here's [the pull request](https://github.com/simonw/asgi-csrf/issues/13).
4
5 I started by installing and using the [pytest-cov](https://pypi.org/project/pytest-cov/) pytest plugin.
6
7 ```
8 pip install pytest-cov
9 pytest --cov=asgi_csrf
10 ```
11 This shows the current code coverage percentage for the `asgi_csrf` module in the terminal output:
13 collected 18 items
14
15 test_asgi_csrf.py .................. [100%]
16
17 ---------- coverage: platform darwin, python 3.7.3-final-0 -----------
23 ========= 18 passed in 0.37s =========
24 ```
25 To generate an HTML report showing which lines are not covered by tests:
26 ```
27 pytest --cov=asgi_csrf --cov-report=html
28 open htmlcov/index.html
29 ```
30 Here's a hosted copy of that report: https://asgi-csrf-htmlcov-ewca4t9se.vercel.app/asgi_csrf_py.html
31
32 ## Failing the tests if coverage is below a certain threshold
33
34 The `--cov-fail-under=100` option does this:
35
36 ```
37 pytest --cov-fail-under=100 --cov asgi_csrf
38 ======= test session starts =======
39 platform darwin -- Python 3.7.3, pytest-6.0.1, py-1.9.0, pluggy-0.13.1
40 rootdir: /Users/simon/Dropbox/Development/asgi-csrf
41 plugins: cov-2.10.1, asyncio-0.14.0
42 collected 18 items
43
44 test_asgi_csrf.py .................. [100%]
45
46 ---------- coverage: platform darwin, python 3.7.3-final-0 -----------
49 asgi_csrf.py 169 13 92%
50
51 FAIL Required test coverage of 100% not reached. Total coverage: 92.31%
52 ```
53 I added this to my [GitHub test action](https://github.com/simonw/asgi-csrf/blob/83d2b4f6bb034b746fd3f20f57ebdbaeae007a73/.github/workflows/test.yml#L27-L29):
54 ```yaml
55 - name: Run tests
56 run: |
57 pytest --cov-fail-under=100 --cov asgi_csrf
58 ```
59 ## Pushing results to codecov.io
61 https://codecov.io/ offers free coverage reporting for open source projects. I authorized it against my GitHub account, then enabled it for the `asgi-csrf` project by navigating to https://codecov.io/gh/simonw/asgi-csrf (hacking the URL saves you from having to paginate through all of your repos looking for the right one).
62
63 codecov.io gives you a token - set that as a GitHub repository secret as `CODECOV_TOKEN` - then add the following to the test action configuration:
64 ```yaml
65 - name: Upload coverage to codecov.io
69 CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
70 ```
71 This will upload your coverage report (no matter if the previous test step failed or succeeded). codecove.io then reports back to pull requests and maintains a dashboard for your project.
72
73 codecov.io doesn't detect if you use a `main` or `master` branch so I had to switch the default branch in the settings at https://codecov.io/gh/simonw/asgi-csrf/settings
til/pytest/pytest-argparse.md
1 # Writing pytest tests against tools written with argparse
2
3 I usually build command-line tools using [Click](https://click.palletsprojects.com/) (and my [click-app](https://github.com/simonw/click-app) cookiecutter template), which includes a really nice [set of tools](https://click.palletsprojects.com/en/8.0.x/testing/) for writing tests.
4
5 Today I decided to try building a tool called [stream-delay](https://github.com/simonw/stream-delay) using [argparse]() from the Python standard library, since it didn't need any other dependencies.
6
7 The one challenge I had was how to write the tests. I used [pytest](https://pytest.org/) as a test-only dependency.
8
9 Here's the pattern I came up with, using the [capsys pytest fixture](https://docs.pytest.org/en/6.2.x/capture.html) to capture standard output from my tool.
10
11 ```python
12 from stream_delay import main
13 import pytest
14
15 @pytest.mark.parametrize("option", ("-h", "--help"))
16 def test_help(capsys, option):
17 try:
18 main([option])
38 # ...
39 ```
40 As you can see, `main()` takes an optional list of arguments. The default for that is `None` which will cause `argparse` to read `sys.argv` - but I can inject arguments to the function from my tests if I need to.
41
42 I'm catching the `SystemExit` exception because this will be raised by default if you use `-h` or `--help` - but I still want to finish my test execution so I can inspect the captured output.
43
44 Complete code:
45
46 - [stream_delay.py](https://github.com/simonw/stream-delay/blob/0.1/stream_delay.py)
47 - [tests/test_stream_delay.py](https://github.com/simonw/stream-delay/blob/0.1/tests/test_stream_delay.py)
til/pytest/pytest-mock-calls.md
1 # Quick and dirty mock testing with mock_calls
2
3 I needed to write a test that checked for a really complex sequence of mock calls for [s3-credentials#3](https://github.com/simonw/s3-credentials/issues/3).
4
5 I ended up using the following trick, using [pytest-mock](https://pypi.org/project/pytest-mock/):
6
7 ```python
8 def test_create(mocker):
9 boto3 = mocker.patch("boto3.client")
10 runner = CliRunner()
11 with runner.isolated_filesystem():
12 result = runner.invoke(cli, ["create", "pytest-bucket-simonw-1", "-c"])
13 assert [str(c) for c in boto3.mock_calls] == [
14 "call('s3')",
15 "call('iam')",
16 "call().head_bucket(Bucket='pytest-bucket-simonw-1')",
17 "call().get_user(UserName='s3.read-write.pytest-bucket-simonw-1')",
18 'call().put_user_policy(PolicyDocument=\'{"Version": "2012-10-17", "Statement": [{"Sid": "ListObjectsInBucket", "Effect": "Allow", "Action": ["s3:ListBucket"], "Resource": ["arn:aws:s3:::pytest-bucket-simonw-1"]}, {"Sid": "AllObjectActions", "Effect": "Allow", "Action": "s3:*Object", "Resource": ["arn:aws:s3:::pytest-bucket-simonw-1/*"]}]}\', PolicyName=\'s3.read-write.pytest-bucket-simonw-1\', UserName=\'s3.read-write.pytest-bucket-simonw-1\')',
19 "call().create_access_key(UserName='s3.read-write.pytest-bucket-simonw-1')",
20 "call().create_access_key().__getitem__('AccessKey')",
21 "call().create_access_key().__getitem__().__str__()",
22 ]
23 ```
24 I used the trick I describe in [How to cheat at unit tests with pytest and Black](https://simonwillison.net/2020/Feb/11/cheating-at-unit-tests-pytest-black/) where I run that comparison against an empty `[]` list, then use `pytest --pdb` to drop into a debugger and copy and paste the output of `[str(c) for c in boto3.mock_calls]` into my test code.
25
26 Initially I used a comparison directly against `boto3.mock_calls` - but this threw a surprising error. The calls sequence I baked into my tests looked like this:
27
28 ```python
29 from unittest.mock import call
30
31 # ...
34 call("s3"),
35 call("iam"),
36 call().head_bucket(Bucket="pytest-bucket-simonw-1"),
37 call().get_user(UserName="s3.read-write.pytest-bucket-simonw-1"),
38 call().put_user_policy(
39 PolicyDocument='{"Version": "2012-10-17", "Statement": [{"Sid": "ListObjectsInBucket", "Effect": "Allow", "Action": ["s3:ListBucket"], "Resource": ["arn:aws:s3:::pytest-bucket-simonw-1"]}, {"Sid": "AllObjectActions", "Effect": "Allow", "Action": "s3:*Object", "Resource": ["arn:aws:s3:::pytest-bucket-simonw-1/*"]}]}',
40 PolicyName="s3.read-write.pytest-bucket-simonw-1",
41 UserName="s3.read-write.pytest-bucket-simonw-1",
42 ),
43 call().create_access_key(UserName="s3.read-write.pytest-bucket-simonw-1"),
44 call().create_access_key().__getitem__("AccessKey"),
45 call().create_access_key().__getitem__().__str__(),
46 ]
47 ```
48 But when I ran `pytest` that last one failed:
49 ```
50 E - 'call().create_access_key().__getitem__()',
53 E ? ^^^^^^^^^^
54 ```
55 It turns out `__str__()` calls do not play well with the `call()` constructor - see [this StackOverflow question](https://stackoverflow.com/questions/61926147/how-to-represent-unittest-mock-call-str).
56
57 My solution was to cast them all to `str()` using a list comprehension, which ended up fixing that problem.
59 ## Gotcha: parameter ordering
60
61 There's one major flaw to the `str()` trick I'm using here: the order in which parameters are displayed in the string representation of `call()` may differ between Python versions. I had to undo this trick in one place I was using it ([see here](https://github.com/simonw/s3-credentials/issues/8)) as a result due to the following test failure:
62
63 ```
til/pytest/playwright-pytest.md
1 # Using pytest and Playwright to test a JavaScript web application
2
3 I [decided to add](https://github.com/simonw/datasette-lite/issues/35) automated tests to my [Datasette Lite](https://simonwillison.net/2022/May/4/datasette-lite/) project. Datasette Lite bundles my Datasette Python web application as a client-side application running inside WebAssembly using Pyodide.
4
5 I wrote the tests using [playwright-pytest](https://github.com/microsoft/playwright-pytest), which lets you write tests in Python using Microsoft's [Playwright](https://playwright.dev/) browser automation library.
6
7 ## Installing playwright-pytest
8
9 Two steps:
10
11 pip install playwright-pytest
12
13 Then a second step to install the browsers using by Playwright itself:
15 playwright install
16
17 I had those browsers installed already, but I still needed to run that command since the updated version of `playwright-pytest` needed more recent versions.
18
19 (I had limited internet while doing this, and discovered that you can trick Playwright into using an older browser version by renaming a folder in `~/Library/Caches/ms-playwright` to the one that shows up in the error message that says that the browsers cannot be found.)
20
21 ## An initial test
22
23 The first test I wrote looked like this, saved in `test_app.py`:
24
25 ```python
26 from playwright.sync_api import Page, expect
27
28 def test_initial_load(page: Page):
29 page.goto("https://lite.datasette.io/")
30 loading = page.locator("#loading-indicator")
38 ```
39
40 Then run the test by running this in the same directory as that file:
41
42 pytest
43
44 `playwright-pytest` provides the `page` fixture - annotating it with `: Page` is optional but if you do that then VS Code knows what it is and can provide autocomplete in the editor.
45
46 `page.goto()` causes the browser to navigate to that URL.
53 expect(loading).to_have_css("display", "block")
54 ```
55 The `expect()` helper function encapsulates the concept of polling the page to wait for something to become true within a time limit. This is the key technique for avoiding "flaky" tests when working with Playwright.
56
57
58 The assertions [are listed here](https://playwright.dev/python/docs/test-assertions).
59
60 You don't actually need to use `expect()` though - that's useful if you don't know how long it will take for the page to load, but if you know the page is already loaded you can write assertions like this instead:
67 ```
68
69 ## pytest options
70
71 The `playwright-pytest` package adds a bunch of new options to `pytest`. The most useful is `--headed`:
72
73 pytest --headed
74
75 This runs the tests in "headed" mode - which means a visible browser window pops up during the tests so you can see what is happening.
76
77 `--browser firefox` runs them using Firefox instead of Chromium.
78
79 `--tracing on` is really interesting: it generates a trace ZIP file which you can then open using https://trace.playwright.dev/ to explore a detailed trace of the test as it executed.
80
81 `--video on` records a video (as a `.webm` file) of the test. I've not tried it yet, but `--video retain-on-failure` only keeps that video if the test fails.
82
83 Here's [documentation on all of the options](https://playwright.dev/python/docs/test-runners).
84
85 ## Running a localhost static server during the tests
86
87 I wanted to run the tests against the most recent version of my code, which consists of an `index.html` file and a `webworker.js` file. Because these use web workers they need to be run from an actual localhost web server, so I needed to start one at the beginning of the tests and shut it down at the end.
88
89 I wrote about my solution for this in another TIL: [Start a server in a subprocess during a pytest session](https://til.simonwillison.net/pytest/subprocess-server).
90
91 ## My test suite so far
92
93 Here's where [I've got to so far](https://github.com/simonw/datasette-lite/blob/daba69708c7a72adad20dce3c534b9a399ef11c8/tests/test_datasette_lite.py):
94
95 ```python
97 from subprocess import Popen, PIPE
98 import pathlib
99 import pytest
100 import time
101 from http.client import HTTPConnection
104
105
106 @pytest.fixture(scope="module")
107 def static_server():
108 process = Popen(
129
130
131 @pytest.fixture(scope="module")
132 def dslite(static_server, browser: Browser) -> Page:
133 page = browser.new_page()
140
141
142 def test_initial_load(dslite: Page):
143 expect(dslite.locator("#loading-indicator")).to_have_css("display", "none")
144
145
146 def test_has_two_databases(dslite: Page):
147 assert [el.inner_text() for el in dslite.query_selector_all("h2")] == [
148 "fixtures",
151
152
153 def test_navigate_to_database(dslite: Page):
154 h2 = dslite.query_selector("h2")
155 assert h2.inner_text() == "fixtures"
174 ## Running it in GitHub Actions
175
176 Here's the [GitHub Actions workflow](https://github.com/simonw/datasette-lite/blob/main/.github/workflows/test.yml) I'm using to run the tests:
177
178 ```yaml
187
188 jobs:
189 test:
190 runs-on: ubuntu-latest
191 steps:
192 - uses: actions/checkout@v3
206 pip install -r dev-requirements.txt
207 playwright install
208 - name: Run test
209 run: |
210 pytest
211 ```
212 [dev-requirements.txt](https://raw.githubusercontent.com/simonw/datasette-lite/main/dev-requirements.txt) contains this:
213 ```
214 pytest-playwright==0.3.0
215 playwright==1.24.0
216 ```
til/pytest/only-run-integration.md
1 # Opt-in integration tests with pytest --integration
2
3 For both [s3-credentials](https://github.com/simonw/s3-credentials) and [datasette-publish-fly](https://github.com/simonw/datasette-publish-fly) I have a need for real-world integration tests that actually interact with the underlying APIs (AWS or Fly) to create and destroy resources on those platforms.
4
5 Most of the time I want my tests to run without doing these. I want the option to run `pytest --integration` to opt-in to running those extra integration tests.
6
7 Here's the pattern I'm using. First, in `tests/conftest.py`:
8
9 ```python
10 import pytest
11
12
13 def pytest_addoption(parser):
14 parser.addoption(
15 "--integration",
16 action="store_true",
17 default=False,
18 help="run integration tests",
19 )
20
21
22 def pytest_configure(config):
23 config.addinivalue_line(
24 "markers",
25 "integration: mark test as integration test, only run with --integration",
26 )
27
28
29 def pytest_collection_modifyitems(config, items):
30 if config.getoption("--integration"):
31 # Also run integration tests
32 return
33 skip_integration = pytest.mark.skip(reason="use --integration option to run")
34 for item in items:
35 if "integration" in item.keywords:
36 item.add_marker(skip_integration)
37 ```
38 This implements a `@pytest.mark.integration` marker which I can use to mark any test that should be considered part of the integration test suite. These will be skipped by default... but will not be skipped if the `--integration` option is passed to `pytest`.
39
40 Then in the tests I can either do this:
41
42 ```python
43 @pytest.mark.integration
44 def test_integration_s3():
45 # ...
46 ```
47 Or if I have a module that only contains integration tests - `tests/test_integration.py` - I can use the following line to apply that decorator to every test in the module:
48 ```python
49 import pytest
50
51 pytestmark = pytest.mark.integration
52
53 def test_integration_s3():
54 # ...
55 ```
til/pytest/mocking-boto.md
1 # Mocking a Textract LimitExceededException with boto
2
3 For [s3-ocr issue #21](https://github.com/simonw/s3-ocr/issues/21) I needed to write a test that simulates what happens when Amazon Textract returns a "LimitExceededException". When using boto this error presents itself as an exception:
4
5 > `botocore.errorfactory.LimitExceededException: An error occurred (LimitExceededException) when calling the StartDocumentTextDetection operation: Open jobs exceed maximum concurrent job limit`
6
7 I uses [moto](https://github.com/spulec/moto) to simulate AWS in that test suite, but moto does not yet have a mechanism for simulating Textract errors like this one.
8
9 I ended up turning to Python mocks, here provided by the the [pytest-mock](https://pypi.org/project/pytest-mock/) fixture. Here's the test I came up with:
10
11 ```python
12 def test_limit_exceeded_automatic_retry(s3, mocker):
13 mocked = mocker.patch("s3_ocr.cli.start_document_text_extraction")
14 # It's going to fail the first time, then succeed
33 ```python
34 def start_document_text_extraction(textract, **kwargs):
35 # Wrapper function to make this easier to mock in tests
36 return textract.start_document_text_detection(**kwargs)
37 ```
38 The most confusing thing about working with Python mocks is figuring out the string to use to mock the right piece of code. I like this pattern of refactoring the code under test to make it as simple to mock as possible.
39
40 The code I am testing here implements automatic retries. As such, I needed the API method I am simulating to fail the first time and then succeed the second time.
41
42 Originally I had done this with a `side_effect()` function - see below - but then [@szotten on Twitter](https://twitter.com/szotten/status/1556337221258575873) pointed out that you can instead set `mock.side_effect` to a list and it will [cycle through those items in turn](https://docs.python.org/3/library/unittest.mock.html#unittest.mock.Mock.side_effect):
43
44 ```python
til/pytest/mock-httpx.md
1 # How to mock httpx using pytest-mock
2
3 I wrote this test to exercise some [httpx](https://pypi.org/project/httpx/) code today, using [pytest-mock](https://pypi.org/project/pytest-mock/).
4
5 The key was to use `mocker.patch.object(cli, "httpx")` which patches the `httpx` module that was imported by the `cli` module.
6
7 Here the `mocker` function argument is a fixture that is provided by `pytest-mock`.
8
9 ```python
10 from conditional_get import cli
11 from click.testing import CliRunner
12
13
14 def test_performs_conditional_get(mocker):
15 m = mocker.patch.object(cli, "httpx")
16 m.get.return_value = mocker.Mock()
39 )
40 ```
41 https://github.com/simonw/conditional-get/blob/485fab46f01edd99818b829e99765ed9ce0978b5/tests/test_cli.py
42
43 ## Mocking a JSON response
45 Here's a mock for a GraphQL POST request that returns JSON:
46 ```python
47 @pytest.fixture
48 def mock_graphql_region(mocker):
49 m = mocker.patch("datasette_publish_fly.httpx")
52 m.post.return_value.json.return_value = {"data": {"nearestRegion": {"code": "sjc"}}}
53 ```
54 https://github.com/simonw/datasette-publish-fly/blob/5253220bded001e94561e215d553f352838e7a1c/tests/test_publish_fly.py#L16-L21
55
56 ## Mocking httpx.stream
67 https://stackoverflow.com/a/6112456 helped me figure out the following:
68 ```python
69 def test_performs_conditional_get(mocker):
70 m = mocker.patch.object(cli, "httpx")
71 m.stream.return_value.__enter__.return_value = mocker.Mock()
75 ]
76 ```
77 https://github.com/simonw/conditional-get/blob/80454f972d39e2b418572d7938146830fab98fa6/tests/test_cli.py
78
79 ## Mocking an HTTP error triggered by response.raise_for_status()
81 The `response.raise_for_status()` raises an exception if an HTTP error (e.g. a 404 or 500) occurred.
82
83 Here's how I [mocked that to return an error](https://github.com/simonw/airtable-to-yaml/blob/ebd94b2e29d6f2ec3dc64d161495a759330027e8/tests/test_airtable_to_yaml.py#L43-L56):
84
85 ```python
86 def test_airtable_to_yaml_error(mocker):
87 m = mocker.patch.object(cli, "httpx")
88 m.get.return_value = mocker.Mock()
til/pytest/coverage-with-context.md
1 # pytest coverage with context
2
3 [This tweet](https://twitter.com/mariatta/status/1499863816489734146) from \@Mariatta tipped me off to the ability to measure "contexts" when [running coverage](https://coverage.readthedocs.io/en/6.3.2/contexts.html#context-reporting) - as a way to tell which tests exercise which specific lines of code.
4
5 My [sqlite-utils](https://github.com/simonw/sqlite-utils) project uses `pytest` for the test suite. I decided to figure out how to get this working with [pytest-cov](https://pypi.org/project/pytest-cov/).
6
7 After some experimentation, this is the recipe that worked for me:
8
9 ```
10 # In the virtual environment, make sure pytest-cov is installed:
11 % pip install pytest-cov
12 # First, run pytest to calculate coverage of the `sqlite_utils` package, with context
13 % pytest --cov=sqlite_utils --cov-context=test
14 # The .coverage file is actually a SQLite database:
15 % ls -lah .coverage
23 Here's what one of the pages looks like, displaying the context for some lines of code:
24
25 ![The code has an expandable section which reveals which tests executed each individual line.](https://user-images.githubusercontent.com/9599/156860441-66e35994-653a-4ab7-b690-4d901fc57750.png)
26
27 ## The .coverage schema
til/pytest/async-fixtures.md
1 # Async fixtures with pytest-asyncio
2
3 I wanted to use a fixture with `pytest-asyncio` that was itsef as `async def` function, so that it could execute `await` statements.
4
5 Since I'm using a `pytest.ini` file containing `asyncio_mode = strict` I had to use the `@pytest_asyncio.fixture` fixture to get this to work. Without that fixture I got this error:
6
7 ```
9 E AssertionError: assert False
10 E + where False = _has_explicit_asyncio_mark(<function ds_with_route at 0x11332d2d0>)
11 E + where <function ds_with_route at 0x11332d2d0> = <FixtureDef argname='ds_with_route' scope='function' baseid='tests/test_routes.py'>.func
12 ```
13
14 Swapping `@pytest.fixture` for `@pytest_asyncio.fixture` fixed this problem:
15
16 ```python
17 import pytest_asyncio
18
19 @pytest_asyncio.fixture
20 async def ds_with_route():
21 ds = Datasette()
til/pypi/pypi-releases-from-github.md
53 ## Configuring the workflow
54
55 This took me the most time to figure out. I already have a `publish.yml` workflow I use for my other projects, which uses `twine` and a PyPI token to upload packages, after first running the tests.
56
57 Here's [the workflow](https://github.com/datasette/datasette-build/blob/main/.github/workflows/publish.yml) I eventually landed, in `.github/workflows/publish.yml`:
65
66 jobs:
67 test:
68 runs-on: ubuntu-latest
69 strategy:
70 matrix:
80 - name: Install dependencies
81 run: |
82 pip install -e '.[test]'
83 - name: Run tests
84 run: |
85 pytest
86 deploy:
87 runs-on: ubuntu-latest
88 needs: [test]
89 environment: release
90 permissions:
107 uses: pypa/gh-action-pypi-publish@release/v1
108 ```
109 The `test` job is pretty standard - it sets up a matrix to run the tests against multiple Python versions, then runs `pytest`.
110
111 It's set to trigger by this block:
123 ```yaml
124 deploy: