Skip to content

Commit

Permalink
Merge pull request #10 from hs3city/session-10
Browse files Browse the repository at this point in the history
Session 10
  • Loading branch information
szmktk authored Feb 8, 2024
2 parents 0c9cdd3 + 70f9d43 commit edd3332
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 43 deletions.
2 changes: 2 additions & 0 deletions go.work.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
97 changes: 76 additions & 21 deletions link/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@ package main

import (
"bytes"
"flag"
"fmt"
"io"
"log"
"os"
"strings"

"golang.org/x/net/html"
)
Expand All @@ -13,40 +17,91 @@ type Link struct {
Text string
}

func catch[T any](val T, err error) T {
if err != nil {
fmt.Println(err)
os.Exit(1)
}
return val
func (l Link) String() string {
return fmt.Sprintf("{href='%s', text='%s'}", l.Href, l.Text)
}

func main() {
inputFile := readFile(*parseUserInput())
defer inputFile.Close()
links := parseLinks(*inputFile)
log.Println(links)
}

func parseUserInput() *string {
htmlFilePath := flag.String("file", "ex2.html", "Path to the HTML file")
flag.Parse()

if *htmlFilePath == "" {
flag.Usage()
log.Fatalln("Error: HTML file path is required.")
}

content := catch(os.ReadFile("ex3.html"))
return htmlFilePath
}

reader := bytes.NewReader(content)
tokenizer := html.NewTokenizer(reader)
func readFile(path string) *os.File {
file, err := os.Open(path)
if err != nil {
log.Fatalf("Error reading HTML file: '%s': %v", path, err)
}

return file
}

func parseLinks(file os.File) []Link {
tokenizer := html.NewTokenizer(&file)
var links []Link
var buffer bytes.Buffer
// we can use string here as well
// var text string
var catchText bool
var link Link

// var links []string
for {
t := tokenizer.Next()
if t == html.ErrorToken {
//fmt.Println("Error token")
// fmt.Println(t)
tokenType := tokenizer.Next()
err := processErrorToken(tokenizer, tokenType)
if err != nil {
break
}

// fmt.Println(z.Token().Attr)
token := tokenizer.Token()
if token.Data == "a" && len(token.Attr) > 0 {
for _, attr := range token.Attr {
if attr.Key == "href" {
fmt.Println(attr.Val)
if tokenType == html.StartTagToken {
token := tokenizer.Token()
if token.DataAtom.String() == "a" && len(token.Attr) > 0 {
for _, attr := range token.Attr {
if attr.Key == "href" {
link.Href = attr.Val
catchText = true
}
}
}
} else if tokenType == html.TextToken {
if catchText {
buffer.Write(tokenizer.Raw())
// text += string(tokenizer.Raw())
}
} else if tokenType == html.EndTagToken {
token := tokenizer.Token()
if token.DataAtom.String() == "a" {
link.Text = strings.TrimSpace(buffer.String())
links = append(links, link)
buffer.Reset()
// text = ""
catchText = false
}
}

}

return links
}

func processErrorToken(tokenizer *html.Tokenizer, tokenType html.TokenType) error {
if tokenType == html.ErrorToken {
err := tokenizer.Err()
if err != io.EOF {
log.Fatalln("Error when parsing HTML", err)
}
return err
}
return nil
}
22 changes: 0 additions & 22 deletions link/main_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package main

import (
"reflect"
"testing"
)

Expand All @@ -17,24 +16,3 @@ func Test_main(t *testing.T) {
})
}
}

func Test_catch(t *testing.T) {
type args struct {
val []byte
err error
}
tests := []struct {
name string
args args
want []byte
}{
// TODO: Add test cases.
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := catch(tt.args.val, tt.args.err); !reflect.DeepEqual(got, tt.want) {
t.Errorf("catch() = %v, want %v", got, tt.want)
}
})
}
}

0 comments on commit edd3332

Please sign in to comment.