clx-browser

[ACTIVE] a smol browser based off of circumflex
git clone git://git.figbert.com/clx-browser.git
Log | Files | Refs | README | LICENSE

commit 75a224627c528b830a5cf92be938214db98a2760
parent 6e7af3f2eea3c3fce7bd7439e00c9d1217075e26
Author: FIGBERT <figbert@figbert.com>
Date:   Sat, 30 Jul 2022 13:16:06 -0700

Clean URLs of excess query components

Diffstat:
Mgo.mod | 3++-
Mgo.sum | 2++
Mreader.go | 30++++++++++++++++++++++++++++++
3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod @@ -4,13 +4,14 @@ go 1.18 require ( github.com/JohannesKaufmann/html-to-markdown v1.3.5 + github.com/PuerkitoBio/goquery v1.8.0 github.com/charmbracelet/glamour v0.5.0 github.com/charmbracelet/lipgloss v0.5.0 github.com/go-shiori/go-readability v0.0.0-20220215145315-dd6828d2f09b + github.com/kozmos/clean-url v0.0.0-20171106101704-be7f9d853a5e ) require ( - github.com/PuerkitoBio/goquery v1.8.0 // indirect github.com/alecthomas/chroma v0.10.0 // indirect github.com/andybalholm/cascadia v1.3.1 // indirect github.com/aymerick/douceur v0.2.0 // indirect diff --git a/go.sum b/go.sum @@ -72,6 +72,8 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kozmos/clean-url v0.0.0-20171106101704-be7f9d853a5e h1:OBbESkCZyg5JmX+cOu90az48Eiw0n5vB5IcwIsv6G6M= +github.com/kozmos/clean-url v0.0.0-20171106101704-be7f9d853a5e/go.mod h1:MNjZeAQSFsNlu7wGmb3uIzR99mMVR/JUmr4IaDvh4z8= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= diff --git a/reader.go b/reader.go @@ -1,14 +1,20 @@ package main import ( + "fmt" "net/http" "net/url" + "strings" "time" md "github.com/JohannesKaufmann/html-to-markdown" "github.com/JohannesKaufmann/html-to-markdown/plugin" "github.com/go-shiori/go-readability" + + "github.com/PuerkitoBio/goquery" + + "github.com/kozmos/clean-url" ) func getArticle(URL string) (string, string) { @@ -29,8 +35,32 @@ func getArticle(URL string) (string, string) { panic(err) } + href := md.Rule{ + Filter: []string{"a"}, + Replacement: func(content string, selec *goquery.Selection, opt *md.Options) *string { + href, ok := selec.Attr("href") + if !ok { + return md.String(strings.TrimSpace(content)) + } + + parsedURL, err := url.Parse(href) + if err != nil { + panic(err) + } + + return md.String( + fmt.Sprintf( + "[%s](%s://%s)", + strings.TrimSpace(content), parsedURL.Scheme, + cleanurl.Clean(opt.GetAbsoluteURL(selec, href, pageURL.Host)), + ), + ) + }, + } + opt := &md.Options{} converter := md.NewConverter(URL, true, opt) + converter.AddRules(href) converter.Use(plugin.Table()) markdown, err := converter.ConvertString(art.Content) if err != nil {