datasette-atom/tests/test_atom.py
import datasettefrom datasette.app import Datasetteimport urllib.parseimport pytestEXPECTED_ATOM = """<?xml version='1.0' encoding='UTF-8'?><feed xmlns="http://www.w3.org/2005/Atom"><id>http://localhost/_memory.atom?sql=%0A++++select%0A++++++++1+as+atom_id%2C%0A++++++++123+as+atom_title%2C%0A++++++++%272019-10-23T21%3A32%3A12-07%3A00%27+as+atom_updated%2C%0A++++++++%27blah+%3Cb%3EBold%3C%2Fb%3E%27+as+atom_content%2C%0A++++++++%27Author%27+as+atom_author_name%2C%0A++++++++%27https%3A%2F%2Fwww.example.com%2F%27+as+atom_author_uri%0A++++union+select%0A++++++++%27atom-id-2%27+as+atom_id%2C%0A++++++++%27title+2%27+as+atom_title%2C%0A++++++++%272019-09-23T21%3A32%3A12-07%3A00%27+as+atom_updated%2C%0A++++++++%27blah%27+as+atom_content%2C%0A++++++++null+as+atom_author_name%2C%0A++++++++null+as+atom_author_uri%3B%0A++++</id><title>select1 as atom_id,123 as atom_title,'2019-10-23T21:32:12-07:00' as atom_updated,'blah <b>Bold</b>' as atom_content,'Author' as atom_author_name,'https://www.example.com/' as atom_author_uriunion select'atom-id-2' as atom_id,'title 2' as atom_title,'2019-09-23T21:32:12-07:00' as atom_updated,'blah' as atom_content,null as atom_author_name,null as atom_author_uri;</title><updated>2019-10-23T21:32:12-07:00</updated><link href="http://localhost/_memory.atom?sql=%0A++++select%0A++++++++1+as+atom_id%2C%0A++++++++123+as+atom_title%2C%0A++++++++%272019-10-23T21%3A32%3A12-07%3A00%27+as+atom_updated%2C%0A++++++++%27blah+%3Cb%3EBold%3C%2Fb%3E%27+as+atom_content%2C%0A++++++++%27Author%27+as+atom_author_name%2C%0A++++++++%27https%3A%2F%2Fwww.example.com%2F%27+as+atom_author_uri%0A++++union+select%0A++++++++%27atom-id-2%27+as+atom_id%2C%0A++++++++%27title+2%27+as+atom_title%2C%0A++++++++%272019-09-23T21%3A32%3A12-07%3A00%27+as+atom_updated%2C%0A++++++++%27blah%27+as+atom_content%2C%0A++++++++null+as+atom_author_name%2C%0A++++++++null+as+atom_author_uri%3B%0A++++" rel="self"/><generator uri="https://github.com/simonw/datasette" version="{version}">Datasette</generator><entry><id>1</id><title>123</title><updated>2019-10-23T21:32:12-07:00</updated><author><name>Author</name><uri>https://www.example.com/</uri></author><content type="text">blah <b>Bold</b></content></entry><entry><id>atom-id-2</id><title>title 2</title><updated>2019-09-23T21:32:12-07:00</updated><content type="text">blah</content></entry></feed>""".strip()EXPECTED_ATOM_WITH_LINK = """<?xml version='1.0' encoding='UTF-8'?><feed xmlns="http://www.w3.org/2005/Atom"><id>http://localhost/_memory.atom?sql=%0A++++select%0A++++++++%27atom-id%27+as+atom_id%2C%0A++++++++%27title%27+as+atom_title%2C%0A++++++++%272019-10-23T21%3A32%3A12-07%3A00%27+as+atom_updated%2C%0A++++++++%27https%3A%2F%2Fwww.niche-museums.com%2F%27+as+atom_link%2C%0A++++++++%27blah%27+as+atom_content%3B%0A++++</id><title>select'atom-id' as atom_id,'title' as atom_title,'2019-10-23T21:32:12-07:00' as atom_updated,'https://www.niche-museums.com/' as atom_link,'blah' as atom_content;</title><updated>2019-10-23T21:32:12-07:00</updated><link href="http://localhost/_memory.atom?sql=%0A++++select%0A++++++++%27atom-id%27+as+atom_id%2C%0A++++++++%27title%27+as+atom_title%2C%0A++++++++%272019-10-23T21%3A32%3A12-07%3A00%27+as+atom_updated%2C%0A++++++++%27https%3A%2F%2Fwww.niche-museums.com%2F%27+as+atom_link%2C%0A++++++++%27blah%27+as+atom_content%3B%0A++++" rel="self"/><generator uri="https://github.com/simonw/datasette" version="{version}">Datasette</generator><entry><id>atom-id</id><title>title</title><updated>2019-10-23T21:32:12-07:00</updated><content type="text">blah</content><link href="https://www.niche-museums.com/" rel="alternate"/></entry></feed>""".strip()EXPECTED_ATOM_WITH_HTML = """<?xml version='1.0' encoding='UTF-8'?><feed xmlns="http://www.w3.org/2005/Atom"><id>http://localhost/_memory.atom?sql=%0A++++select%0A++++++++%27atom-id%27+as+atom_id%2C%0A++++++++%27title%27+as+atom_title%2C%0A++++++++%272019-10-23T21%3A32%3A12-07%3A00%27+as+atom_updated%2C%0A++++++++%27https%3A%2F%2Fwww.niche-museums.com%2F%27+as+atom_link%2C%0A++++++++%27%3Ch2%3Eblah%3C%2Fh2%3E%3Cbr%3E%3Cscript%3Ealert%28%22bad%22%29%3C%2Fscript%3E%27+as+atom_content_html%3B%0A++++</id><title>select'atom-id' as atom_id,'title' as atom_title,'2019-10-23T21:32:12-07:00' as atom_updated,'https://www.niche-museums.com/' as atom_link,'<h2>blah</h2><br><script>alert("bad")</script>' as atom_content_html;</title><updated>2019-10-23T21:32:12-07:00</updated><link href="http://localhost/_memory.atom?sql=%0A++++select%0A++++++++%27atom-id%27+as+atom_id%2C%0A++++++++%27title%27+as+atom_title%2C%0A++++++++%272019-10-23T21%3A32%3A12-07%3A00%27+as+atom_updated%2C%0A++++++++%27https%3A%2F%2Fwww.niche-museums.com%2F%27+as+atom_link%2C%0A++++++++%27%3Ch2%3Eblah%3C%2Fh2%3E%3Cbr%3E%3Cscript%3Ealert%28%22bad%22%29%3C%2Fscript%3E%27+as+atom_content_html%3B%0A++++" rel="self"/><generator uri="https://github.com/simonw/datasette" version="{version}">Datasette</generator><entry><id>atom-id</id><title>title</title><updated>2019-10-23T21:32:12-07:00</updated><content type="html"><h2>blah</h2><br>&lt;script&gt;alert("bad")&lt;/script&gt;</content><link href="https://www.niche-museums.com/" rel="alternate"/></entry></feed>""".strip()@pytest.mark.asyncioasync def test_incorrect_sql_returns_400():ds = Datasette(immutables=[], memory=True)response = await ds.client.get("/_memory.atom?sql=select+sqlite_version()")assert 400 == response.status_codeassert b"SQL query must return columns" in response.content@pytest.mark.asyncioasync def test_atom_for_valid_query():sql = """select1 as atom_id,123 as atom_title,'2019-10-23T21:32:12-07:00' as atom_updated,'blah <b>Bold</b>' as atom_content,'Author' as atom_author_name,'https://www.example.com/' as atom_author_uriunion select'atom-id-2' as atom_id,'title 2' as atom_title,'2019-09-23T21:32:12-07:00' as atom_updated,'blah' as atom_content,null as atom_author_name,null as atom_author_uri;"""ds = Datasette(memory=True)response = await ds.client.get("/_memory.atom", params={"sql": sql})assert 200 == response.status_codeassert "application/xml; charset=utf-8" == response.headers["content-type"]assert (EXPECTED_ATOM.format(version=datasette.__version__)== response.content.decode("utf-8").strip())@pytest.mark.asyncioasync def test_atom_with_optional_link():sql = """select'atom-id' as atom_id,'title' as atom_title,'2019-10-23T21:32:12-07:00' as atom_updated,'https://www.niche-museums.com/' as atom_link,'blah' as atom_content;"""ds = Datasette(memory=True)response = await ds.client.get("/_memory.atom", params={"sql": sql})assert 200 == response.status_codeassert "application/xml; charset=utf-8" == response.headers["content-type"]assert (EXPECTED_ATOM_WITH_LINK.format(version=datasette.__version__)== response.content.decode("utf-8").strip())@pytest.mark.asyncioasync def test_atom_with_bad_html():sql = """select'atom-id' as atom_id,'title' as atom_title,'2019-10-23T21:32:12-07:00' as atom_updated,'https://www.niche-museums.com/' as atom_link,'<h2>blah</h2><br><script>alert("bad")</script>' as atom_content_html;"""ds = Datasette(memory=True)response = await ds.client.get("/_memory.atom", params={"sql": sql})assert 200 == response.status_codeassert "application/xml; charset=utf-8" == response.headers["content-type"]assert (EXPECTED_ATOM_WITH_HTML.format(version=datasette.__version__)== response.content.decode("utf-8").strip())@pytest.mark.asyncioasync def test_atom_link_only_shown_for_correct_queries():sql = """select'atom-id' as atom_id,'title' as atom_title,'2019-10-23T21:32:12-07:00' as atom_updated,'https://www.niche-museums.com/' as atom_link,'<h2>blah</h2><br><script>alert("bad")</script>' as atom_content_html;"""ds = Datasette(memory=True)response = await ds.client.get("/_memory", params={"sql": sql})assert 200 == response.status_codeassert "text/html; charset=utf-8" == response.headers["content-type"]assert '<a href="/_memory.atom' in response.text# But with a different query that link is not shown:response = await ds.client.get("/_memory", params={"sql": "select sqlite_version()"})assert '<a href="/_memory.json' in response.textassert '<a href="/_memory.atom' not in response.text@pytest.mark.asyncioasync def test_atom_from_titled_canned_query():sql = """select'atom-id' as atom_id,'title' as atom_title,'2019-10-23T21:32:12-07:00' as atom_updated,'https://www.niche-museums.com/' as atom_link,'blah' as atom_content;"""ds = Datasette(memory=True,metadata={"databases": {"_memory": {"queries": {"feed": {"sql": sql, "title": "My atom feed"}}}}},)response = await ds.client.get("/_memory/feed.atom")assert 200 == response.status_codeassert "application/xml; charset=utf-8" == response.headers["content-type"]xml = response.content.decode("utf-8")assert "<title>My atom feed</title>" in xml@pytest.mark.asyncio@pytest.mark.parametrize("config,should_allow",[(True, True),(False, False),({"_memory": ["latest"]}, True),({"_memory": ["notlatest"]}, False),],)async def test_allow_unsafe_html_in_canned_queries(config, should_allow):sql = """select'atom-id' as atom_id,'title' as atom_title,'2019-10-23T21:32:12-07:00' as atom_updated,'https://www.niche-museums.com/' as atom_link,'<iframe>An iframe!</iframe>' as atom_content_html;"""metadata = {"databases": {"_memory": {"queries": {"latest": {"sql": sql}}},},"plugins": {"datasette-atom": {"allow_unsafe_html_in_canned_queries": config}},}ds = Datasette(memory=True,metadata=metadata,)response = await ds.client.get("/_memory/latest.atom")assert 200 == response.status_codeassert "application/xml; charset=utf-8" == response.headers["content-type"]if should_allow:assert ('<content type="html"><iframe>An iframe!</iframe></content>'in response.text)else:assert ('<content type="html">&lt;iframe&gt;An iframe!&lt;/iframe&gt;</content>'in response.text)