comeetie 2022-11-09 17:49:04 +01:00
rodzic c408a1bb2b
commit 491c24aa68
33 zmienionych plików z 614137 dodań i 0 usunięć

Wyświetl plik

@ -0,0 +1,5 @@
{
"debugBreakpointsState": {
"breakpoints": []
}
}

Wyświetl plik

@ -0,0 +1,9 @@
{
"sortOrder": [
{
"columnIndex": 2,
"ascending": true
}
],
"path": "~/Projets/mapstodon"
}

Wyświetl plik

@ -0,0 +1,3 @@
{
"activeTab": 0
}

Wyświetl plik

@ -0,0 +1,14 @@
{
"left": {
"splitterpos": 444,
"topwindowstate": "NORMAL",
"panelheight": 1074,
"windowheight": 1112
},
"right": {
"splitterpos": 667,
"topwindowstate": "NORMAL",
"panelheight": 1074,
"windowheight": 1112
}
}

Wyświetl plik

@ -0,0 +1,5 @@
{
"TabSet1": 0,
"TabSet2": 2,
"TabZoom": {}
}

Wyświetl plik

@ -0,0 +1,8 @@
build-last-errors="[]"
build-last-errors-base-dir=""
build-last-outputs="[]"
compile_pdf_state="{\"tab_visible\":false,\"running\":false,\"target_file\":\"\",\"output\":\"\",\"errors\":[]}"
files.monitored-path=""
find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":false,\"ignoreCase\":false,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOn\":[],\"matchOff\":[],\"replaceMatchOn\":[],\"replaceMatchOff\":[]},\"running\":false,\"replace\":false,\"preview\":false,\"gitFlag\":false,\"replacePattern\":\"\"}"
imageDirtyState="1"
saveActionState="-1"

Wyświetl plik

@ -0,0 +1,5 @@

Wyświetl plik

@ -0,0 +1 @@
{"active_set":"","sets":[]}

Wyświetl plik

@ -0,0 +1,7 @@
{
"tempName": "Untitled1",
"source_window_id": "",
"Source": "Source",
"cursorPosition": "25,83",
"scrollLine": "0"
}

Wyświetl plik

@ -0,0 +1,7 @@
{
"tempName": "Untitled1",
"source_window_id": "",
"Source": "Source",
"cursorPosition": "77,13",
"scrollLine": "57"
}

Wyświetl plik

@ -0,0 +1,2 @@
~%2FProjets%2Fmapstodon%2FscrData.R="E46C5F41"
~%2FProjets%2Fmapstodon%2FscrPlot.R="21CE1D26"

Wyświetl plik

@ -0,0 +1,27 @@
{
"id": "632B0BBB",
"path": "~/Projets/mapstodon/scrData.R",
"project_path": "scrData.R",
"type": "r_source",
"hash": "0",
"contents": "",
"dirty": true,
"created": 1667917274243.0,
"source_on_save": false,
"relative_order": 1,
"properties": {
"tempName": "Untitled1",
"source_window_id": "",
"Source": "Source",
"cursorPosition": "77,13",
"scrollLine": "57"
},
"folds": "",
"lastKnownWriteTime": 1668007721,
"encoding": "UTF-8",
"collab_server": "",
"source_window": "",
"last_content_update": 1668008010790,
"read_only": false,
"read_only_alternatives": []
}

Wyświetl plik

@ -0,0 +1,91 @@
library(httr)
library(dplyr)
library(stringr)
peers = jsonlite:::fromJSON("https://mapstodon.space/api/v1/instance/peers")
peers= c(peers,"mapstodon.space")
Links=list()
Lang=list()
for(peer in peers){
ur <- paste0("https://",peer,"/api/v1/timelines/public?limit=25")
next_url = ur;
instances = c();
languages = c();
for(pages in 1:20){
try({
response = GET(next_url)
link = headers(response)$link
min_id = link |> str_extract("max_id=[0123456789]+") |> str_remove("max_id=")
next_url = paste0(ur,"&max_id=",min_id)
print(next_url)
jsonRespText <- content(response, "text")
toots <- jsonlite::fromJSON(jsonRespText)
instances = c(instances,toots$account$url |> str_remove("/@.*") |> str_remove("https://"))
languages = c(languages,toots$language)
})
Sys.sleep(0.2)
}
try({
peers_links=table(instances)
Links[[peer]]=data.frame(instance=peer,from=names(peers_links),nb_toots=as.numeric(peers_links))
peers_langs=table(languages)
Lang[[peer]]=data.frame(instance=peer,from=names(peers_langs),nb_toots=as.numeric(peers_langs))
})
}
seems_ok=sapply(Links,\(df){sum(df$nb_toots)})>400
sum(seems_ok)/length(peers)
Links_clean=Links[seems_ok]
Lang_clean=Lang[seems_ok]
Links.df = do.call(rbind,Links_clean)
Lang.df = do.call(rbind,Lang_clean)
library(readr)
write_csv(Links.df,"Links_raw.csv")
write_csv(Lang.df,"Langs_raw.csv")
# some cleaning
instances = unique(Links.df$instance)
Links.cl = Links.df |> mutate(from=str_remove(from,"/.*")) |>
group_by(instance,from)|>
summarise(nb_toots=sum(nb_toots)) |>
filter(from %in% instances) |>
ungroup()
Links.probs= Links.cl |>
add_count(instance,wt = nb_toots) |>
mutate(p=nb_toots/n) |>
arrange(desc(p))
# get instances average prob
Nblink = sum(Links.cl$nb_toots)
Links.probs.average = Links.cl |>
count(from,wt=nb_toots,name="nb_toots_total") |>
mutate(paverage=nb_toots_total/Nblink) |>
arrange(desc(paverage))
# some filtering + biner
Links.filtered = Links.probs |> left_join(Links.probs.average,by="from") |>
mutate(logratio=log(p/paverage)) |>
filter(logratio>log(5),nb_toots>10)
write_csv(Links.filtered,"Links_filtered.csv")
# individuals
library(rtoot)
token = auth_setup("mapstodon.space", "user")
directory = get_instance_directory("mapstodon.space",local = TRUE,limit=500)
attr(directory,"headers")
folowers = get_account_followers("109302019656613226",token=token,limit=200)
attr(folowers,"headers")
folowing = get_account_following("109302019656613226",token=token,limit=200)

Wyświetl plik

@ -0,0 +1,27 @@
{
"id": "8D711DAA",
"path": "~/Projets/mapstodon/scrPlot.R",
"project_path": "scrPlot.R",
"type": "r_source",
"hash": "0",
"contents": "",
"dirty": false,
"created": 1668002263018.0,
"source_on_save": false,
"relative_order": 2,
"properties": {
"tempName": "Untitled1",
"source_window_id": "",
"Source": "Source",
"cursorPosition": "25,83",
"scrollLine": "0"
},
"folds": "",
"lastKnownWriteTime": 1668007729,
"encoding": "UTF-8",
"collab_server": "",
"source_window": "",
"last_content_update": 1668007729854,
"read_only": false,
"read_only_alternatives": []
}

Wyświetl plik

@ -0,0 +1,33 @@
graph=jsonlite::fromJSON("graph.json")
class(graph$nodes)
library(ggplot2)
ggplot(graph$nodes)+geom_point(aes(x=attributes$x,y=attributes$y,size=attributes$`nansi-indegree`,color=factor(attributes$`nansi-louvain`)))+
scale_size_area("",guide="none",max_size = 3)+
scale_color_brewer(palette="Set3",guide="none")+
theme_void()
library(dplyr)
graph$nodes |> group_by(attributes$`nansi-louvain`) |> top_n(3,attributes$`nansi-indegree`) |>
transmute(indeg=attributes$`nansi-indegree`,com=attributes$`nansi-louvain`,lab= attributes$`label`) |> arrange(com) |> View()
mapstodon = graph$nodes |> filter(attributes$label=="mapstodon.space")
labels = graph$nodes |>
transmute(label=attributes$label,x=attributes$x,y=attributes$y,indeg=attributes$`nansi-indegree`) |>
arrange(desc(indeg)) |>
filter(indeg>=20,!is.na(label))
ggplot(graph$nodes)+geom_point(aes(x=attributes$x,y=attributes$y,size=attributes$`nansi-indegree`,color=factor(attributes$`nansi-louvain`)))+
geom_point(data=mapstodon,aes(x=attributes$x,y=attributes$y,size=5*attributes$`nansi-indegree`),shape=1,color="red")+
geom_text(data=labels,aes(x=x,y=y,label=label),size=1.5)+
scale_size_area("",guide="none",max_size = 3)+
scale_color_brewer(palette="Set3",guide="none")+
theme_void()

Wyświetl plik

@ -0,0 +1,2 @@
/home/comeetie/Projets/mapstodon/scrData.R="812016DC"
/home/comeetie/Projets/mapstodon/scrPlot.R="54314C63"

13275
Langs_raw.csv 100644

Plik diff jest za duży Load Diff

6852
Links_filtered.csv 100644

Plik diff jest za duży Load Diff

226187
Links_raw.csv 100644

Plik diff jest za duży Load Diff

117898
Network(1).gexf 100644

Plik diff jest za duży Load Diff

64618
Network(2).gexf 100644

Plik diff jest za duży Load Diff

73306
Network(3).gexf 100644

Plik diff jest za duży Load Diff

BIN
first_try.pdf 100644

Plik binarny nie jest wyświetlany.

111622
graph.json 100644

Plik diff jest za duży Load Diff

BIN
graph.png 100644

Plik binarny nie jest wyświetlany.

Po

Szerokość:  |  Wysokość:  |  Rozmiar: 10 MiB

1
graph.svg 100644

File diff suppressed because one or more lines are too long

Po

Szerokość:  |  Wysokość:  |  Rozmiar: 1.3 MiB

13
mapstodon.Rproj 100644
Wyświetl plik

@ -0,0 +1,13 @@
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX

86
scrData.R 100644
Wyświetl plik

@ -0,0 +1,86 @@
library(httr)
library(dplyr)
library(stringr)
peers = jsonlite:::fromJSON("https://mapstodon.space/api/v1/instance/peers")
peers= c(peers,"mapstodon.space")
Links=list()
Lang=list()
for(peer in peers){
ur <- paste0("https://",peer,"/api/v1/timelines/public?limit=25")
next_url = ur;
instances = c();
languages = c();
for(pages in 1:20){
try({
response = GET(next_url)
link = headers(response)$link
min_id = link |> str_extract("max_id=[0123456789]+") |> str_remove("max_id=")
next_url = paste0(ur,"&max_id=",min_id)
print(next_url)
jsonRespText <- content(response, "text")
toots <- jsonlite::fromJSON(jsonRespText)
instances = c(instances,toots$account$url |> str_remove("/@.*") |> str_remove("https://"))
languages = c(languages,toots$language)
})
Sys.sleep(0.2)
}
try({
peers_links=table(instances)
Links[[peer]]=data.frame(instance=peer,from=names(peers_links),nb_toots=as.numeric(peers_links))
peers_langs=table(languages)
Lang[[peer]]=data.frame(instance=peer,from=names(peers_langs),nb_toots=as.numeric(peers_langs))
})
}
seems_ok=sapply(Links,\(df){sum(df$nb_toots)})>400
sum(seems_ok)/length(peers)
Links_clean=Links[seems_ok]
Lang_clean=Lang[seems_ok]
Links.df = do.call(rbind,Links_clean)
Lang.df = do.call(rbind,Lang_clean)
library(readr)
write_csv(Links.df,"Links_raw.csv")
write_csv(Lang.df,"Langs_raw.csv")
# some cleaning
instances = unique(Links.df$instance)
Links.cl = Links.df |> mutate(from=str_remove(from,"/.*")) |>
group_by(instance,from)|>
summarise(nb_toots=sum(nb_toots)) |>
filter(from %in% instances) |>
ungroup()
Links.probs= Links.cl |>
add_count(instance,wt = nb_toots) |>
mutate(p=nb_toots/n) |>
arrange(desc(p))
# get instances average prob
Nblink = sum(Links.cl$nb_toots)
Links.probs.average = Links.cl |>
count(from,wt=nb_toots,name="nb_toots_total") |>
mutate(paverage=nb_toots_total/Nblink) |>
arrange(desc(paverage))
# some filtering + biner
Links.filtered = Links.probs |> left_join(Links.probs.average,by="from") |>
mutate(logratio=log(p/paverage)) |>
filter(logratio>log(5),nb_toots>10)
write_csv(Links.filtered,"Links_filtered.csv")
# individuals
library(rtoot)
token = auth_setup("mapstodon.space", "user")
folowers = get_account_followers("109302019656613226",token=token,limit=200)
attr(folowers,"headers")
folowing = get_account_following("109302019656613226",token=token,limit=200)

33
scrPlot.R 100644
Wyświetl plik

@ -0,0 +1,33 @@
graph=jsonlite::fromJSON("graph.json")
class(graph$nodes)
library(ggplot2)
ggplot(graph$nodes)+geom_point(aes(x=attributes$x,y=attributes$y,size=attributes$`nansi-indegree`,color=factor(attributes$`nansi-louvain`)))+
scale_size_area("",guide="none",max_size = 3)+
scale_color_brewer(palette="Set3",guide="none")+
theme_void()
library(dplyr)
graph$nodes |> group_by(attributes$`nansi-louvain`) |> top_n(3,attributes$`nansi-indegree`) |>
transmute(indeg=attributes$`nansi-indegree`,com=attributes$`nansi-louvain`,lab= attributes$`label`) |> arrange(com) |> View()
mapstodon = graph$nodes |> filter(attributes$label=="mapstodon.space")
labels = graph$nodes |>
transmute(label=attributes$label,x=attributes$x,y=attributes$y,indeg=attributes$`nansi-indegree`) |>
arrange(desc(indeg)) |>
filter(indeg>=20,!is.na(label))
ggplot(graph$nodes)+geom_point(aes(x=attributes$x,y=attributes$y,size=attributes$`nansi-indegree`,color=factor(attributes$`nansi-louvain`)))+
geom_point(data=mapstodon,aes(x=attributes$x,y=attributes$y,size=5*attributes$`nansi-indegree`),shape=1,color="red")+
geom_text(data=labels,aes(x=x,y=y,label=label),size=1.5)+
scale_size_area("",guide="none",max_size = 3)+
scale_color_brewer(palette="Set3",guide="none")+
theme_void()

BIN
second_try.pdf 100644

Plik binarny nie jest wyświetlany.