commit 75a224627c528b830a5cf92be938214db98a2760
parent 6e7af3f2eea3c3fce7bd7439e00c9d1217075e26
Author: FIGBERT <figbert@figbert.com>
Date: Sat, 30 Jul 2022 13:16:06 -0700
Clean URLs of excess query components
Diffstat:
3 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/go.mod b/go.mod
@@ -4,13 +4,14 @@ go 1.18
require (
github.com/JohannesKaufmann/html-to-markdown v1.3.5
+ github.com/PuerkitoBio/goquery v1.8.0
github.com/charmbracelet/glamour v0.5.0
github.com/charmbracelet/lipgloss v0.5.0
github.com/go-shiori/go-readability v0.0.0-20220215145315-dd6828d2f09b
+ github.com/kozmos/clean-url v0.0.0-20171106101704-be7f9d853a5e
)
require (
- github.com/PuerkitoBio/goquery v1.8.0 // indirect
github.com/alecthomas/chroma v0.10.0 // indirect
github.com/andybalholm/cascadia v1.3.1 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
diff --git a/go.sum b/go.sum
@@ -72,6 +72,8 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
+github.com/kozmos/clean-url v0.0.0-20171106101704-be7f9d853a5e h1:OBbESkCZyg5JmX+cOu90az48Eiw0n5vB5IcwIsv6G6M=
+github.com/kozmos/clean-url v0.0.0-20171106101704-be7f9d853a5e/go.mod h1:MNjZeAQSFsNlu7wGmb3uIzR99mMVR/JUmr4IaDvh4z8=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
diff --git a/reader.go b/reader.go
@@ -1,14 +1,20 @@
package main
import (
+ "fmt"
"net/http"
"net/url"
+ "strings"
"time"
md "github.com/JohannesKaufmann/html-to-markdown"
"github.com/JohannesKaufmann/html-to-markdown/plugin"
"github.com/go-shiori/go-readability"
+
+ "github.com/PuerkitoBio/goquery"
+
+ "github.com/kozmos/clean-url"
)
func getArticle(URL string) (string, string) {
@@ -29,8 +35,32 @@ func getArticle(URL string) (string, string) {
panic(err)
}
+ href := md.Rule{
+ Filter: []string{"a"},
+ Replacement: func(content string, selec *goquery.Selection, opt *md.Options) *string {
+ href, ok := selec.Attr("href")
+ if !ok {
+ return md.String(strings.TrimSpace(content))
+ }
+
+ parsedURL, err := url.Parse(href)
+ if err != nil {
+ panic(err)
+ }
+
+ return md.String(
+ fmt.Sprintf(
+ "[%s](%s://%s)",
+ strings.TrimSpace(content), parsedURL.Scheme,
+ cleanurl.Clean(opt.GetAbsoluteURL(selec, href, pageURL.Host)),
+ ),
+ )
+ },
+ }
+
opt := &md.Options{}
converter := md.NewConverter(URL, true, opt)
+ converter.AddRules(href)
converter.Use(plugin.Table())
markdown, err := converter.ConvertString(art.Content)
if err != nil {