add mastodon tests, handle auth'd timelines

main
Tao Bojlén 2023-06-10 20:18:09 +01:00
rodzic 8f4193e43f
commit a4eaf75c70
5 zmienionych plików z 334 dodań i 11 usunięć

Wyświetl plik

@ -14,6 +14,9 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
# We might already know that this is a Pleroma instance from nodeinfo
if result != nil do
cond do
# for pleroma and smithereen, the instance_type will get overwritten
# with the correct value -- but we still want to return true here
# since they are compatible with the mastodon API
Map.get(result, :instance_type) == :pleroma -> true
Map.get(result, :instance_type) == :smithereen -> true
Map.get(result, :instance_type) == :mastodon -> true
@ -97,16 +100,7 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
interactions \\ %{},
statuses_seen \\ 0
) do
# If `statuses_seen == 0`, it's the first call of this function, which means we want to query the database for the
# most recent status we have.
min_timestamp =
if statuses_seen == 0 do
get_last_crawl_timestamp(domain)
else
min_timestamp
end
endpoint = "https://#{domain}/api/v1/timelines/public?local=true"
endpoint = "https://#{domain}/api/v1/timelines/public?local=true&limit=40"
endpoint =
if max_id do
@ -117,7 +111,26 @@ defmodule Backend.Crawler.Crawlers.Mastodon do
Logger.debug("Crawling #{endpoint}")
statuses = http_client().get_and_decode!(endpoint)
case http_client().get_and_decode(endpoint) do
{:ok, statuses} ->
handle_statuses(statuses, domain, min_timestamp, interactions, statuses_seen)
# if there's an error (e.g. because the timeline prevents unauthenticated access)
# then stop here
{:error, _} ->
{interactions, statuses_seen}
end
end
defp handle_statuses(statuses, domain, min_timestamp, interactions, statuses_seen) do
# If `statuses_seen == 0`, it's the first call of this function, which means we want to query the database for the
# most recent status we have.
min_timestamp =
if statuses_seen == 0 do
get_last_crawl_timestamp(domain)
else
min_timestamp
end
filtered_statuses =
statuses

Wyświetl plik

@ -0,0 +1,117 @@
defmodule Backend.Crawler.Crawlers.MastodonTest do
use Backend.DataCase
alias Backend.Crawler.Crawlers.Mastodon
alias Backend.Crawler.ApiCrawler
alias Backend.HttpBehaviour
import Mox
setup :verify_on_exit!
describe "is_instance_type?/2" do
test "returns true for pleroma and smithereen" do
assert Mastodon.is_instance_type?("example.com", %{instance_type: :pleroma})
assert Mastodon.is_instance_type?("example.com", %{instance_type: :smithereen})
end
test "returns true for mastodon instance" do
expect(HttpMock, :get_and_decode, fn "https://example.com/api/v1/instance" ->
{:ok, TestHelpers.load_json("mastodon/instance.json")}
end)
assert Mastodon.is_instance_type?("example.com", nil)
end
end
describe "crawl/2" do
test "does nothing for small instances" do
expect(HttpMock, :get_and_decode!, fn "https://example.com/api/v1/instance" ->
TestHelpers.load_json("mastodon/instance.json")
|> Map.merge(%{"stats" => %{"user_count" => 1}})
end)
result = Mastodon.crawl("example.com", ApiCrawler.get_default())
assert result ==
ApiCrawler.get_default() |> Map.merge(%{instance_type: :mastodon, user_count: 1})
end
test "crawls large instance" do
expect(HttpMock, :get_and_decode!, fn "https://example.com/api/v1/instance" ->
TestHelpers.load_json("mastodon/instance.json")
end)
expect(HttpMock, :get_and_decode, fn "https://example.com/api/v1/instance/peers" ->
{:ok, TestHelpers.load_json("mastodon/peers.json")}
end)
expect(
HttpMock,
:get_and_decode,
fn "https://example.com/api/v1/timelines/public?local=true&limit=40" ->
{:ok, TestHelpers.load_json("mastodon/timeline.json")}
end
)
expect(
HttpMock,
:get_and_decode,
4,
fn "https://example.com/api/v1/timelines/public?local=true&limit=40&max_id=123" ->
{:ok, TestHelpers.load_json("mastodon/timeline.json")}
end
)
result = Mastodon.crawl("example.com", ApiCrawler.get_default())
assert result == %{
description: "long description",
federation_restrictions: [],
instance_type: :mastodon,
interactions: %{},
peers: ["other.com"],
user_count: 100,
status_count: 100,
statuses_seen: 5,
version: "1.2.3"
}
end
test "handles timelines that require auth" do
expect(HttpMock, :get_and_decode!, fn "https://example.com/api/v1/instance" ->
TestHelpers.load_json("mastodon/instance.json")
end)
expect(HttpMock, :get_and_decode, fn "https://example.com/api/v1/instance/peers" ->
{:ok, TestHelpers.load_json("mastodon/peers.json")}
end)
expect(
HttpMock,
:get_and_decode,
fn "https://example.com/api/v1/timelines/public?local=true&limit=40" ->
{:error,
%HttpBehaviour.Error{
message: "HTTP request failed with status code 422",
status_code: 422,
body: "{\"error\":\"This method requires an authenticated user\"}"
}}
end
)
result = Mastodon.crawl("example.com", ApiCrawler.get_default())
assert result == %{
description: "long description",
federation_restrictions: [],
instance_type: :mastodon,
interactions: %{},
peers: ["other.com"],
user_count: 100,
status_count: 100,
statuses_seen: 0,
version: "1.2.3"
}
end
end
end

Wyświetl plik

@ -0,0 +1,137 @@
{
"uri": "mastodon.social",
"title": "Mastodon",
"short_description": "short description",
"description": "long description",
"email": "staff@mastodon.social",
"version": "1.2.3",
"urls": {
"streaming_api": "wss://streaming.mastodon.social"
},
"stats": {
"user_count": 100,
"status_count": 100,
"domain_count": 55958
},
"thumbnail": "https://files.mastodon.social/site_uploads/files/000/000/001/@1x/57c12f441d083cde.png",
"languages": ["en"],
"registrations": true,
"approval_required": false,
"invites_enabled": true,
"configuration": {
"accounts": {
"max_featured_tags": 10
},
"statuses": {
"max_characters": 500,
"max_media_attachments": 4,
"characters_reserved_per_url": 23
},
"media_attachments": {
"supported_mime_types": [
"image/jpeg",
"image/png",
"image/gif",
"image/heic",
"image/heif",
"image/webp",
"image/avif",
"video/webm",
"video/mp4",
"video/quicktime",
"video/ogg",
"audio/wave",
"audio/wav",
"audio/x-wav",
"audio/x-pn-wave",
"audio/vnd.wave",
"audio/ogg",
"audio/vorbis",
"audio/mpeg",
"audio/mp3",
"audio/webm",
"audio/flac",
"audio/aac",
"audio/m4a",
"audio/x-m4a",
"audio/mp4",
"audio/3gpp",
"video/x-ms-asf"
],
"image_size_limit": 16777216,
"image_matrix_limit": 33177600,
"video_size_limit": 103809024,
"video_frame_rate_limit": 120,
"video_matrix_limit": 8294400
},
"polls": {
"max_options": 4,
"max_characters_per_option": 50,
"min_expiration": 300,
"max_expiration": 2629746
}
},
"contact_account": {
"id": "13179",
"username": "Mastodon",
"acct": "Mastodon",
"display_name": "Mastodon",
"locked": false,
"bot": false,
"discoverable": true,
"group": false,
"created_at": "2016-11-23T00:00:00.000Z",
"note": "<p>Official account of the Mastodon project. News, releases, announcements! Learn more on our website!</p>",
"url": "https://mastodon.social/@Mastodon",
"avatar": "https://files.mastodon.social/accounts/avatars/000/013/179/original/b4ceb19c9c54ec7e.png",
"avatar_static": "https://files.mastodon.social/accounts/avatars/000/013/179/original/b4ceb19c9c54ec7e.png",
"header": "https://files.mastodon.social/accounts/headers/000/013/179/original/878f382e7dd9fb84.png",
"header_static": "https://files.mastodon.social/accounts/headers/000/013/179/original/878f382e7dd9fb84.png",
"followers_count": 778859,
"following_count": 8,
"statuses_count": 237,
"last_status_at": "2023-05-13",
"noindex": false,
"emojis": [],
"roles": [],
"fields": [
{
"name": "Homepage",
"value": "<a href=\"https://joinmastodon.org\" target=\"_blank\" rel=\"nofollow noopener noreferrer me\"><span class=\"invisible\">https://</span><span class=\"\">joinmastodon.org</span><span class=\"invisible\"></span></a>",
"verified_at": "2018-10-31T04:11:00.076+00:00"
},
{
"name": "Patreon",
"value": "<a href=\"https://patreon.com/mastodon\" target=\"_blank\" rel=\"nofollow noopener noreferrer me\"><span class=\"invisible\">https://</span><span class=\"\">patreon.com/mastodon</span><span class=\"invisible\"></span></a>",
"verified_at": null
},
{
"name": "GitHub",
"value": "<a href=\"https://github.com/mastodon\" target=\"_blank\" rel=\"nofollow noopener noreferrer me\"><span class=\"invisible\">https://</span><span class=\"\">github.com/mastodon</span><span class=\"invisible\"></span></a>",
"verified_at": null
}
]
},
"rules": [
{
"id": "1",
"text": "Sexually explicit or violent media must be marked as sensitive when posting"
},
{
"id": "2",
"text": "No racism, sexism, homophobia, transphobia, xenophobia, or casteism"
},
{
"id": "3",
"text": "No incitement of violence or promotion of violent ideologies"
},
{
"id": "4",
"text": "No harassment, dogpiling or doxxing of other users"
},
{
"id": "7",
"text": "Do not share intentionally false or misleading information"
}
]
}

Wyświetl plik

@ -0,0 +1 @@
["other.com"]

Wyświetl plik

@ -0,0 +1,55 @@
[
{
"id": "123",
"created_at": "2023-06-10T18:59:36.207Z",
"in_reply_to_id": null,
"in_reply_to_account_id": null,
"sensitive": false,
"spoiler_text": "",
"visibility": "public",
"language": "de",
"uri": "https://mastodon.social/users/someuser/statuses/110521455489577427",
"url": "https://mastodon.social/@someuser/110521455489577427",
"replies_count": 0,
"reblogs_count": 0,
"favourites_count": 0,
"edited_at": null,
"content": "<p>New post</p>",
"reblog": null,
"application": {
"name": "IFTTT",
"website": "https://www.ifttt.com"
},
"account": {
"id": "108265572384945996",
"username": "someuser",
"acct": "someuser",
"display_name": "Some User",
"locked": false,
"bot": false,
"discoverable": true,
"group": false,
"created_at": "2022-05-08T00:00:00.000Z",
"note": "<p>My account</p>",
"url": "https://mastodon.social/@someuser",
"avatar": "https://example.com/picture.jpg",
"avatar_static": "https://example.com/picture.jpg",
"header": "https://example.com/picture.jpg",
"header_static": "https://example.com/picture.jpg",
"followers_count": 7,
"following_count": 73,
"statuses_count": 256,
"last_status_at": "2023-06-10",
"noindex": false,
"emojis": [],
"roles": [],
"fields": []
},
"media_attachments": [],
"mentions": [],
"tags": [],
"emojis": [],
"card": {},
"poll": null
}
]