From 4d4193ff497378cb85bebae4630c992dda4b9d1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tao=20Bojl=C3=A9n?= <66130243+taobojlen@users.noreply.github.com> Date: Sat, 10 Jun 2023 09:36:03 +0100 Subject: [PATCH] fix crawls of well-connected instances --- .github/workflows/deploy.yml | 2 +- .gitignore | 1 + backend/lib/backend/crawler/crawler.ex | 12 ++++++++---- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index a7bf9ad..aeac7cd 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -35,7 +35,7 @@ jobs: - name: Build & push uses: depot/build-push-action@v1 with: - project: 3tz1wxj8cr + project: rktsv8c4sk context: backend push: true tags: ${{ steps.meta.outputs.tags }} diff --git a/.gitignore b/.gitignore index 10a1db2..df06164 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ data/ *.class backend/.sobelow +node_modules/ # Environments .env diff --git a/backend/lib/backend/crawler/crawler.ex b/backend/lib/backend/crawler/crawler.ex index 51d359b..fb21015 100644 --- a/backend/lib/backend/crawler/crawler.ex +++ b/backend/lib/backend/crawler/crawler.ex @@ -235,9 +235,12 @@ defmodule Backend.Crawler do Enum.map(result.federation_restrictions, fn {domain, _restriction_type} -> domain end) ) |> Enum.map(&%{domain: &1, inserted_at: now, updated_at: now, next_crawl: now}) + |> Enum.chunk_every(5000) - Instance - |> Repo.insert_all(new_instances, on_conflict: :nothing, conflict_target: :domain) + new_instances + |> Enum.each(fn chunk -> + Repo.insert_all(Instance, chunk, on_conflict: :nothing, conflict_target: :domain) + end) Repo.transaction(fn -> ## Save peer relationships ## @@ -274,9 +277,10 @@ defmodule Backend.Crawler do updated_at: now } ) + |> Enum.chunk_every(5000) - InstancePeer - |> Repo.insert_all(new_instance_peers) + new_instance_peers + |> Enum.each(fn chunk -> Repo.insert_all(InstancePeer, chunk) end) end) ## Save federation restrictions ##