reader.go (1396B)
1 package main 2 3 import ( 4 "fmt" 5 "net/http" 6 "net/url" 7 "strings" 8 "time" 9 10 md "github.com/JohannesKaufmann/html-to-markdown" 11 "github.com/JohannesKaufmann/html-to-markdown/plugin" 12 13 "github.com/go-shiori/go-readability" 14 15 "github.com/PuerkitoBio/goquery" 16 17 "github.com/kozmos/clean-url" 18 ) 19 20 func getArticle(URL string) (string, string) { 21 client := http.Client{Timeout: 5 * time.Second} 22 response, err := client.Get(URL) 23 if err != nil { 24 panic(err) 25 } 26 defer response.Body.Close() 27 28 pageURL, err := url.Parse(URL) 29 if err != nil { 30 panic(err) 31 } 32 33 art, err := readability.FromReader(response.Body, pageURL) 34 if err != nil { 35 panic(err) 36 } 37 38 href := md.Rule{ 39 Filter: []string{"a"}, 40 Replacement: func(content string, selec *goquery.Selection, opt *md.Options) *string { 41 href, ok := selec.Attr("href") 42 if !ok { 43 return md.String(strings.TrimSpace(content)) 44 } 45 46 parsedURL, err := url.Parse(href) 47 if err != nil { 48 panic(err) 49 } 50 51 return md.String( 52 fmt.Sprintf( 53 "[%s](%s://%s)", 54 strings.TrimSpace(content), parsedURL.Scheme, 55 cleanurl.Clean(opt.GetAbsoluteURL(selec, href, pageURL.Host)), 56 ), 57 ) 58 }, 59 } 60 61 opt := &md.Options{} 62 converter := md.NewConverter(URL, true, opt) 63 converter.AddRules(href) 64 converter.Use(plugin.Table()) 65 markdown, err := converter.ConvertString(art.Content) 66 if err != nil { 67 panic(err) 68 } 69 70 return art.Title, markdown 71 }