save

2022-11-09 17:49:04 +01:00 · 2022-11-09 17:49:04 +01:00 · 491c24aa68
commit 491c24aa68
--- a/.Rproj.user/D08F4A2A/pcs/debug-breakpoints.pper
+++ b/.Rproj.user/D08F4A2A/pcs/debug-breakpoints.pper
@ -0,0 +1,5 @@
+{
+    "debugBreakpointsState": {
+        "breakpoints": []
+    }
+}
--- a/.Rproj.user/D08F4A2A/pcs/files-pane.pper
+++ b/.Rproj.user/D08F4A2A/pcs/files-pane.pper
@ -0,0 +1,9 @@
+{
+    "sortOrder": [
+        {
+            "columnIndex": 2,
+            "ascending": true
+        }
+    ],
+    "path": "~/Projets/mapstodon"
+}
--- a/.Rproj.user/D08F4A2A/pcs/source-pane.pper
+++ b/.Rproj.user/D08F4A2A/pcs/source-pane.pper
@ -0,0 +1,3 @@
+{
+    "activeTab": 0
+}
--- a/.Rproj.user/D08F4A2A/pcs/windowlayoutstate.pper
+++ b/.Rproj.user/D08F4A2A/pcs/windowlayoutstate.pper
@ -0,0 +1,14 @@
+{
+    "left": {
+        "splitterpos": 444,
+        "topwindowstate": "NORMAL",
+        "panelheight": 1074,
+        "windowheight": 1112
+    },
+    "right": {
+        "splitterpos": 667,
+        "topwindowstate": "NORMAL",
+        "panelheight": 1074,
+        "windowheight": 1112
+    }
+}
--- a/.Rproj.user/D08F4A2A/pcs/workbench-pane.pper
+++ b/.Rproj.user/D08F4A2A/pcs/workbench-pane.pper
@ -0,0 +1,5 @@
+{
+    "TabSet1": 0,
+    "TabSet2": 2,
+    "TabZoom": {}
+}
--- a/.Rproj.user/D08F4A2A/persistent-state
+++ b/.Rproj.user/D08F4A2A/persistent-state
@ -0,0 +1,8 @@
+build-last-errors="[]"
+build-last-errors-base-dir=""
+build-last-outputs="[]"
+compile_pdf_state="{\"tab_visible\":false,\"running\":false,\"target_file\":\"\",\"output\":\"\",\"errors\":[]}"
+files.monitored-path=""
+find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":false,\"ignoreCase\":false,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOn\":[],\"matchOff\":[],\"replaceMatchOn\":[],\"replaceMatchOff\":[]},\"running\":false,\"replace\":false,\"preview\":false,\"gitFlag\":false,\"replacePattern\":\"\"}"
+imageDirtyState="1"
+saveActionState="-1"
--- a/.Rproj.user/D08F4A2A/rmd-outputs
+++ b/.Rproj.user/D08F4A2A/rmd-outputs
@ -0,0 +1,5 @@
+
+
+
+
+
--- a/.Rproj.user/D08F4A2A/saved_source_markers
+++ b/.Rproj.user/D08F4A2A/saved_source_markers
@ -0,0 +1 @@
+{"active_set":"","sets":[]}
--- a/.Rproj.user/D08F4A2A/sources/prop/21CE1D26
+++ b/.Rproj.user/D08F4A2A/sources/prop/21CE1D26
@ -0,0 +1,7 @@
+{
+    "tempName": "Untitled1",
+    "source_window_id": "",
+    "Source": "Source",
+    "cursorPosition": "25,83",
+    "scrollLine": "0"
+}
--- a/.Rproj.user/D08F4A2A/sources/prop/E46C5F41
+++ b/.Rproj.user/D08F4A2A/sources/prop/E46C5F41
@ -0,0 +1,7 @@
+{
+    "tempName": "Untitled1",
+    "source_window_id": "",
+    "Source": "Source",
+    "cursorPosition": "77,13",
+    "scrollLine": "57"
+}
--- a/.Rproj.user/D08F4A2A/sources/prop/INDEX
+++ b/.Rproj.user/D08F4A2A/sources/prop/INDEX
@ -0,0 +1,2 @@
+~%2FProjets%2Fmapstodon%2FscrData.R="E46C5F41"
+~%2FProjets%2Fmapstodon%2FscrPlot.R="21CE1D26"
--- a/.Rproj.user/D08F4A2A/sources/session-28C952D4/5A890FFC-contents
+++ b/.Rproj.user/D08F4A2A/sources/session-28C952D4/5A890FFC-contents
--- a/.Rproj.user/D08F4A2A/sources/session-28C952D4/632B0BBB
+++ b/.Rproj.user/D08F4A2A/sources/session-28C952D4/632B0BBB
@ -0,0 +1,27 @@
+{
+    "id": "632B0BBB",
+    "path": "~/Projets/mapstodon/scrData.R",
+    "project_path": "scrData.R",
+    "type": "r_source",
+    "hash": "0",
+    "contents": "",
+    "dirty": true,
+    "created": 1667917274243.0,
+    "source_on_save": false,
+    "relative_order": 1,
+    "properties": {
+        "tempName": "Untitled1",
+        "source_window_id": "",
+        "Source": "Source",
+        "cursorPosition": "77,13",
+        "scrollLine": "57"
+    },
+    "folds": "",
+    "lastKnownWriteTime": 1668007721,
+    "encoding": "UTF-8",
+    "collab_server": "",
+    "source_window": "",
+    "last_content_update": 1668008010790,
+    "read_only": false,
+    "read_only_alternatives": []
+}
--- a/.Rproj.user/D08F4A2A/sources/session-28C952D4/632B0BBB-contents
+++ b/.Rproj.user/D08F4A2A/sources/session-28C952D4/632B0BBB-contents
@ -0,0 +1,91 @@
+library(httr)
+library(dplyr)
+library(stringr)
+
+
+peers = jsonlite:::fromJSON("https://mapstodon.space/api/v1/instance/peers")
+peers= c(peers,"mapstodon.space")
+Links=list()
+Lang=list()
+for(peer in peers){
+  ur <- paste0("https://",peer,"/api/v1/timelines/public?limit=25")
+  next_url = ur;
+  instances = c();
+  languages = c();
+  for(pages in 1:20){
+    try({
+      response = GET(next_url)
+      link = headers(response)$link
+      min_id = link |> str_extract("max_id=[0123456789]+") |> str_remove("max_id=")
+      next_url = paste0(ur,"&max_id=",min_id)
+      print(next_url)
+      jsonRespText <- content(response, "text") 
+      toots <- jsonlite::fromJSON(jsonRespText)
+      instances = c(instances,toots$account$url |> str_remove("/@.*") |> str_remove("https://"))
+      languages = c(languages,toots$language)
+    })
+    Sys.sleep(0.2)
+  }
+  try({
+    peers_links=table(instances)
+    Links[[peer]]=data.frame(instance=peer,from=names(peers_links),nb_toots=as.numeric(peers_links))
+    peers_langs=table(languages)
+    Lang[[peer]]=data.frame(instance=peer,from=names(peers_langs),nb_toots=as.numeric(peers_langs))
+  })
+}
+
+
+seems_ok=sapply(Links,\(df){sum(df$nb_toots)})>400
+sum(seems_ok)/length(peers)
+Links_clean=Links[seems_ok]
+Lang_clean=Lang[seems_ok]
+
+Links.df = do.call(rbind,Links_clean)
+Lang.df = do.call(rbind,Lang_clean)
+
+library(readr)
+write_csv(Links.df,"Links_raw.csv")                                
+write_csv(Lang.df,"Langs_raw.csv")
+
+# some cleaning
+instances = unique(Links.df$instance)
+Links.cl = Links.df |> mutate(from=str_remove(from,"/.*")) |>
+  group_by(instance,from)|>
+  summarise(nb_toots=sum(nb_toots)) |>
+  filter(from %in% instances) |>
+  ungroup()
+
+Links.probs=  Links.cl |> 
+  add_count(instance,wt = nb_toots) |>
+  mutate(p=nb_toots/n) |>
+  arrange(desc(p)) 
+
+# get instances average prob
+Nblink = sum(Links.cl$nb_toots)
+Links.probs.average = Links.cl |>
+  count(from,wt=nb_toots,name="nb_toots_total") |>
+  mutate(paverage=nb_toots_total/Nblink) |>
+  arrange(desc(paverage)) 
+
+# some filtering + biner
+Links.filtered = Links.probs |> left_join(Links.probs.average,by="from") |>
+  mutate(logratio=log(p/paverage)) |>
+  filter(logratio>log(5),nb_toots>10)
+
+write_csv(Links.filtered,"Links_filtered.csv")
+
+
+# individuals
+library(rtoot)
+
+
+
+token = auth_setup("mapstodon.space", "user")
+
+directory = get_instance_directory("mapstodon.space",local = TRUE,limit=500)
+attr(directory,"headers")
+
+folowers = get_account_followers("109302019656613226",token=token,limit=200)
+attr(folowers,"headers")
+
+folowing = get_account_following("109302019656613226",token=token,limit=200)
--- a/.Rproj.user/D08F4A2A/sources/session-28C952D4/8D711DAA
+++ b/.Rproj.user/D08F4A2A/sources/session-28C952D4/8D711DAA
@ -0,0 +1,27 @@
+{
+    "id": "8D711DAA",
+    "path": "~/Projets/mapstodon/scrPlot.R",
+    "project_path": "scrPlot.R",
+    "type": "r_source",
+    "hash": "0",
+    "contents": "",
+    "dirty": false,
+    "created": 1668002263018.0,
+    "source_on_save": false,
+    "relative_order": 2,
+    "properties": {
+        "tempName": "Untitled1",
+        "source_window_id": "",
+        "Source": "Source",
+        "cursorPosition": "25,83",
+        "scrollLine": "0"
+    },
+    "folds": "",
+    "lastKnownWriteTime": 1668007729,
+    "encoding": "UTF-8",
+    "collab_server": "",
+    "source_window": "",
+    "last_content_update": 1668007729854,
+    "read_only": false,
+    "read_only_alternatives": []
+}
--- a/.Rproj.user/D08F4A2A/sources/session-28C952D4/8D711DAA-contents
+++ b/.Rproj.user/D08F4A2A/sources/session-28C952D4/8D711DAA-contents
@ -0,0 +1,33 @@
+graph=jsonlite::fromJSON("graph.json")
+
+
+class(graph$nodes)
+
+library(ggplot2)
+ggplot(graph$nodes)+geom_point(aes(x=attributes$x,y=attributes$y,size=attributes$`nansi-indegree`,color=factor(attributes$`nansi-louvain`)))+
+  scale_size_area("",guide="none",max_size = 3)+
+  scale_color_brewer(palette="Set3",guide="none")+
+  theme_void()
+
+
+library(dplyr)
+
+graph$nodes |> group_by(attributes$`nansi-louvain`) |> top_n(3,attributes$`nansi-indegree`) |>
+  transmute(indeg=attributes$`nansi-indegree`,com=attributes$`nansi-louvain`,lab= attributes$`label`) |> arrange(com) |> View()
+
+
+mapstodon = graph$nodes |> filter(attributes$label=="mapstodon.space")
+labels = graph$nodes |> 
+  transmute(label=attributes$label,x=attributes$x,y=attributes$y,indeg=attributes$`nansi-indegree`) |> 
+  arrange(desc(indeg)) |> 
+  filter(indeg>=20,!is.na(label))
+
+ggplot(graph$nodes)+geom_point(aes(x=attributes$x,y=attributes$y,size=attributes$`nansi-indegree`,color=factor(attributes$`nansi-louvain`)))+
+  geom_point(data=mapstodon,aes(x=attributes$x,y=attributes$y,size=5*attributes$`nansi-indegree`),shape=1,color="red")+
+  geom_text(data=labels,aes(x=x,y=y,label=label),size=1.5)+
+  scale_size_area("",guide="none",max_size = 3)+
+  scale_color_brewer(palette="Set3",guide="none")+
+  theme_void()
+
+
+
--- a/.Rproj.user/D08F4A2A/sources/session-28C952D4/lock_file
+++ b/.Rproj.user/D08F4A2A/sources/session-28C952D4/lock_file
--- a/.Rproj.user/shared/notebooks/patch-chunk-names
+++ b/.Rproj.user/shared/notebooks/patch-chunk-names
--- a/.Rproj.user/shared/notebooks/paths
+++ b/.Rproj.user/shared/notebooks/paths
@ -0,0 +1,2 @@
+/home/comeetie/Projets/mapstodon/scrData.R="812016DC"
+/home/comeetie/Projets/mapstodon/scrPlot.R="54314C63"
--- a/Langs_raw.csv
+++ b/Langs_raw.csv
--- a/Links_filtered.csv
+++ b/Links_filtered.csv
--- a/Links_raw.csv
+++ b/Links_raw.csv
--- a/Network(1).gexf
+++ b/Network(1).gexf
--- a/Network(2).gexf
+++ b/Network(2).gexf
--- a/Network(3).gexf
+++ b/Network(3).gexf
--- a/first_try.pdf
+++ b/first_try.pdf
--- a/graph.json
+++ b/graph.json
--- a/graph.png
+++ b/graph.png
--- a/graph.svg
+++ b/graph.svg
--- a/mapstodon.Rproj
+++ b/mapstodon.Rproj
@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
--- a/scrData.R
+++ b/scrData.R
@ -0,0 +1,86 @@
+library(httr)
+library(dplyr)
+library(stringr)
+
+
+peers = jsonlite:::fromJSON("https://mapstodon.space/api/v1/instance/peers")
+peers= c(peers,"mapstodon.space")
+Links=list()
+Lang=list()
+for(peer in peers){
+  ur <- paste0("https://",peer,"/api/v1/timelines/public?limit=25")
+  next_url = ur;
+  instances = c();
+  languages = c();
+  for(pages in 1:20){
+    try({
+      response = GET(next_url)
+      link = headers(response)$link
+      min_id = link |> str_extract("max_id=[0123456789]+") |> str_remove("max_id=")
+      next_url = paste0(ur,"&max_id=",min_id)
+      print(next_url)
+      jsonRespText <- content(response, "text") 
+      toots <- jsonlite::fromJSON(jsonRespText)
+      instances = c(instances,toots$account$url |> str_remove("/@.*") |> str_remove("https://"))
+      languages = c(languages,toots$language)
+    })
+    Sys.sleep(0.2)
+  }
+  try({
+    peers_links=table(instances)
+    Links[[peer]]=data.frame(instance=peer,from=names(peers_links),nb_toots=as.numeric(peers_links))
+    peers_langs=table(languages)
+    Lang[[peer]]=data.frame(instance=peer,from=names(peers_langs),nb_toots=as.numeric(peers_langs))
+  })
+}
+
+
+seems_ok=sapply(Links,\(df){sum(df$nb_toots)})>400
+sum(seems_ok)/length(peers)
+Links_clean=Links[seems_ok]
+Lang_clean=Lang[seems_ok]
+
+Links.df = do.call(rbind,Links_clean)
+Lang.df = do.call(rbind,Lang_clean)
+
+library(readr)
+write_csv(Links.df,"Links_raw.csv")                                
+write_csv(Lang.df,"Langs_raw.csv")
+
+# some cleaning
+instances = unique(Links.df$instance)
+Links.cl = Links.df |> mutate(from=str_remove(from,"/.*")) |>
+  group_by(instance,from)|>
+  summarise(nb_toots=sum(nb_toots)) |>
+  filter(from %in% instances) |>
+  ungroup()
+
+Links.probs=  Links.cl |> 
+  add_count(instance,wt = nb_toots) |>
+  mutate(p=nb_toots/n) |>
+  arrange(desc(p)) 
+
+# get instances average prob
+Nblink = sum(Links.cl$nb_toots)
+Links.probs.average = Links.cl |>
+  count(from,wt=nb_toots,name="nb_toots_total") |>
+  mutate(paverage=nb_toots_total/Nblink) |>
+  arrange(desc(paverage)) 
+
+# some filtering + biner
+Links.filtered = Links.probs |> left_join(Links.probs.average,by="from") |>
+  mutate(logratio=log(p/paverage)) |>
+  filter(logratio>log(5),nb_toots>10)
+
+write_csv(Links.filtered,"Links_filtered.csv")
+
+
+# individuals
+library(rtoot)
+token = auth_setup("mapstodon.space", "user")
+
+
+folowers = get_account_followers("109302019656613226",token=token,limit=200)
+attr(folowers,"headers")
+
+folowing = get_account_following("109302019656613226",token=token,limit=200)
--- a/scrPlot.R
+++ b/scrPlot.R
@ -0,0 +1,33 @@
+graph=jsonlite::fromJSON("graph.json")
+
+
+class(graph$nodes)
+
+library(ggplot2)
+ggplot(graph$nodes)+geom_point(aes(x=attributes$x,y=attributes$y,size=attributes$`nansi-indegree`,color=factor(attributes$`nansi-louvain`)))+
+  scale_size_area("",guide="none",max_size = 3)+
+  scale_color_brewer(palette="Set3",guide="none")+
+  theme_void()
+
+
+library(dplyr)
+
+graph$nodes |> group_by(attributes$`nansi-louvain`) |> top_n(3,attributes$`nansi-indegree`) |>
+  transmute(indeg=attributes$`nansi-indegree`,com=attributes$`nansi-louvain`,lab= attributes$`label`) |> arrange(com) |> View()
+
+
+mapstodon = graph$nodes |> filter(attributes$label=="mapstodon.space")
+labels = graph$nodes |> 
+  transmute(label=attributes$label,x=attributes$x,y=attributes$y,indeg=attributes$`nansi-indegree`) |> 
+  arrange(desc(indeg)) |> 
+  filter(indeg>=20,!is.na(label))
+
+ggplot(graph$nodes)+geom_point(aes(x=attributes$x,y=attributes$y,size=attributes$`nansi-indegree`,color=factor(attributes$`nansi-louvain`)))+
+  geom_point(data=mapstodon,aes(x=attributes$x,y=attributes$y,size=5*attributes$`nansi-indegree`),shape=1,color="red")+
+  geom_text(data=labels,aes(x=x,y=y,label=label),size=1.5)+
+  scale_size_area("",guide="none",max_size = 3)+
+  scale_color_brewer(palette="Set3",guide="none")+
+  theme_void()
+
+
+
--- a/second_try.pdf
+++ b/second_try.pdf