pull/5/head
cblgh 2021-10-16 22:04:18 +02:00 zatwierdzone przez Alexander Cobleigh
rodzic 34b4978895
commit 793a9867cc
3 zmienionych plików z 94 dodań i 2 usunięć

Wyświetl plik

@ -69,6 +69,7 @@ func createTables(db *sql.DB) {
url TEXT NOT NULL,
FOREIGN KEY(url) REFERENCES pages(url)
)`,
`CREATE VIRTUAL TABLE IF NOT EXISTS external_links USING fts5 (url, tokenize="trigram")`,
}
for _, query := range queries {
@ -98,6 +99,29 @@ func SearchWordsByCount(db *sql.DB, words []string) []types.PageData {
return searchWords(db, words, false)
}
func FulltextSearchWords(db *sql.DB, phrase string) []types.PageData {
query := fmt.Sprintf(`SELECT url from external_links WHERE url MATCH ? GROUP BY url ORDER BY RANDOM() LIMIT 30`)
stmt, err := db.Prepare(query)
util.Check(err)
defer stmt.Close()
rows, err := stmt.Query(phrase)
util.Check(err)
defer rows.Close()
var pageData types.PageData
var pages []types.PageData
for rows.Next() {
if err := rows.Scan(&pageData.URL); err != nil {
log.Fatalln(err)
}
pageData.Title = pageData.URL
pages = append(pages, pageData)
}
return pages
}
func GetDomainCount(db *sql.DB) int {
return countQuery(db, "domains")
}
@ -123,6 +147,19 @@ func GetRandomDomain(db *sql.DB) string {
return domain
}
func GetRandomExternalLink(db *sql.DB) string {
rows, err := db.Query("SELECT url FROM external_links ORDER BY RANDOM() LIMIT 1;")
util.Check(err)
defer rows.Close()
var link string
for rows.Next() {
err = rows.Scan(&link)
util.Check(err)
}
return link
}
func GetRandomPage(db *sql.DB) string {
domain := GetRandomDomain(db)
stmt, err := db.Prepare("SELECT url FROM pages WHERE domain = ? ORDER BY RANDOM() LIMIT 1;")
@ -242,3 +279,17 @@ func InsertManyWords(db *sql.DB, batch []types.SearchFragment) {
_, err := db.Exec(stmt, args...)
util.Check(err)
}
func InsertManyExternalLinks(db *sql.DB, externalLinks []string) {
values := make([]string, 0, len(externalLinks))
args := make([]interface{}, 0, len(externalLinks))
for _, externalLink := range externalLinks {
values = append(values, "(?)")
args = append(args, externalLink)
}
stmt := fmt.Sprintf(`INSERT OR IGNORE INTO external_links(url) VALUES %s`, strings.Join(values, ","))
_, err := db.Exec(stmt, args...)
util.Check(err)
}

Wyświetl plik

@ -80,6 +80,7 @@ func Ingest(config types.Config) {
var count int
var batchsize = 100
batch := make([]types.SearchFragment, 0, 0)
var externalLinks []string
scanner := bufio.NewScanner(buf)
for scanner.Scan() {
@ -141,6 +142,8 @@ func Ingest(config types.Config) {
page.Lang = rawdata
case "keywords":
processed = strings.Split(strings.ReplaceAll(payload, ", ", ","), ",")
case "non-webring-link":
externalLinks = append(externalLinks, payload)
default:
continue
}
@ -162,7 +165,8 @@ func Ingest(config types.Config) {
}
if len(pages) > batchsize {
ingestBatch(db, batch, pages)
ingestBatch(db, batch, pages, externalLinks)
externalLinks = make([]string, 0, 0)
batch = make([]types.SearchFragment, 0, 0)
// TODO: make sure we don't partially insert any page data
pages = make(map[string]types.PageData)
@ -174,7 +178,7 @@ func Ingest(config types.Config) {
util.Check(err)
}
func ingestBatch(db *sql.DB, batch []types.SearchFragment, pageMap map[string]types.PageData) {
func ingestBatch(db *sql.DB, batch []types.SearchFragment, pageMap map[string]types.PageData, links []string) {
pages := make([]types.PageData, len(pageMap))
i := 0
for k := range pageMap {
@ -185,6 +189,7 @@ func ingestBatch(db *sql.DB, batch []types.SearchFragment, pageMap map[string]ty
database.InsertManyDomains(db, pages)
database.InsertManyPages(db, pages)
database.InsertManyWords(db, batch)
database.InsertManyExternalLinks(db, links)
log.Println("finished ingesting batch")
}

Wyświetl plik

@ -89,6 +89,35 @@ func (h RequestHandler) searchRoute(res http.ResponseWriter, req *http.Request)
h.renderView(res, "search", view)
}
func (h RequestHandler) externalSearchRoute(res http.ResponseWriter, req *http.Request) {
var query string
view := &TemplateView{}
if req.Method == http.MethodGet {
params := req.URL.Query()
if words, exists := params["q"]; exists && words[0] != "" {
query = words[0]
}
}
pages := database.FulltextSearchWords(h.db, query)
if useURLTitles {
for i, pageData := range pages {
prettyURL, err := url.QueryUnescape(strings.TrimPrefix(strings.TrimPrefix(pageData.URL, "http://"), "https://"))
util.Check(err)
pageData.Title = prettyURL
pages[i] = pageData
}
}
view.Data = SearchData{
Query: query,
Pages: pages,
}
h.renderView(res, "search", view)
}
func (h RequestHandler) aboutRoute(res http.ResponseWriter, req *http.Request) {
view := &TemplateView{}
@ -133,6 +162,11 @@ func (h RequestHandler) randomRoute(res http.ResponseWriter, req *http.Request)
http.Redirect(res, req, link, http.StatusSeeOther)
}
func (h RequestHandler) randomExternalRoute(res http.ResponseWriter, req *http.Request) {
link := database.GetRandomExternalLink(h.db)
http.Redirect(res, req, link, http.StatusSeeOther)
}
func (h RequestHandler) webringRoute(res http.ResponseWriter, req *http.Request) {
http.Redirect(res, req, h.config.General.URL, http.StatusSeeOther)
}
@ -157,6 +191,8 @@ func Serve(config types.Config) {
http.HandleFunc("/about", handler.aboutRoute)
http.HandleFunc("/", handler.searchRoute)
http.HandleFunc("/external", handler.externalSearchRoute)
http.HandleFunc("/random/external", handler.randomExternalRoute)
http.HandleFunc("/random", handler.randomRoute)
http.HandleFunc("/webring", handler.webringRoute)
http.HandleFunc("/filtered", handler.filteredRoute)