From 333211d4d0332586372def291c85940d49d7492a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20Erbsh=C3=A4u=C3=9Fer?= Date: Sun, 24 May 2026 09:22:16 +0200 Subject: [PATCH] add parser for blog articles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Tobias Erbshäußer --- backend/blog.go | 151 +++++++++++++++++++++++++++++++++++++++ backend/go.work | 7 ++ backend/md/parser.go | 50 +++++++++++++ backend/md/properties.go | 109 ++++++++++++++++++++++++++++ backend/md/urls.go | 110 ++++++++++++++++++++++++++++ 5 files changed, 427 insertions(+) create mode 100644 backend/blog.go create mode 100644 backend/go.work create mode 100644 backend/md/parser.go create mode 100644 backend/md/properties.go create mode 100644 backend/md/urls.go diff --git a/backend/blog.go b/backend/blog.go new file mode 100644 index 0000000..909dcbc --- /dev/null +++ b/backend/blog.go @@ -0,0 +1,151 @@ +package main + +import ( + "archive/tar" + "backend/md" + "encoding/json" + "errors" + "fmt" + "io" + "log" + "net/http" + "strconv" + "strings" + "time" +) + +type ArticleStatus = int + +const ( + ArticleStatusDraft ArticleStatus = iota + ArticleStatusPublished + ArticleStatusOffline +) + +type ArticleProperties struct { + Id int64 `json:"id"` + Title string `json:"title"` + Status ArticleStatus `json:"status"` + Tags []string `json:"tags"` + ReleaseDate time.Time `json:"date"` + ModificationDate *time.Time `json:"mod-date"` +} + +type Article struct { + ArticleProperties + Content string + Files []ArticleFile +} + +type ArticleFile struct { + Id int + Data []byte +} + +func ParseArticle(reader io.Reader, filePrefix string) (*Article, error) { + tarFiles := make(map[string][]byte) + tarReader := tar.NewReader(reader) + for { + header, err := tarReader.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + + content, err := io.ReadAll(tarReader) + if err != nil { + return nil, err + } + tarFiles[header.Name] = content + } + + readmeBytes, found := tarFiles["README.md"] + if !found { + return nil, errors.New("README.md not found") + } + + usedFiles := make(map[string]ArticleFile) + nextFileId := 0 + + pc, html, err := md.Parse(func(path string) (string, error) { + content, ok := tarFiles[strings.TrimPrefix(path, "./")] + if !ok { + return "", errors.New("file '" + path + "' not found") + } + + usedFiles[path] = ArticleFile{nextFileId, content} + nextFileId++ + + return filePrefix + strconv.Itoa(nextFileId), nil + }, readmeBytes) + if err != nil { + return nil, err + } + + title, err := md.GetProperty(pc, "title") + if err != nil { + if errors.Is(err, md.PropertyNotFoundError) { + return nil, errors.New("title property not found") + } + + return nil, err + } + + tagsStr, err := md.GetProperty(pc, "tags") + if err != nil && !errors.Is(err, md.PropertyNotFoundError) { + return nil, err + } + + tags := make([]string, 0) + if strings.TrimSpace(tagsStr) != "" { + for _, tagStr := range strings.Split(tagsStr, ",") { + tags = append(tags, strings.TrimSpace(tagStr)) + } + } + + dateStr, err := md.GetProperty(pc, "date") + if err != nil { + if errors.Is(err, md.PropertyNotFoundError) { + return nil, errors.New("date property not found") + } + + return nil, err + } + releaseDate, err := time.Parse(time.DateOnly, dateStr) + if err != nil { + return nil, fmt.Errorf("invalid date property '%s': %v", dateStr, err) + } + + var modificationDate *time.Time + dateStr, err = md.GetProperty(pc, "mod-date") + if err == nil { + tmp, err := time.Parse(time.DateOnly, dateStr) + if err != nil { + return nil, fmt.Errorf("invalid mod-date property '%s': %v", dateStr, err) + } + + modificationDate = &tmp + } else if !errors.Is(err, md.PropertyNotFoundError) { + return nil, err + } + + files := make([]ArticleFile, 0, len(usedFiles)) + for _, file := range usedFiles { + files = append(files, file) + } + + return &Article{ + ArticleProperties{ + -1, + title, + ArticleStatusDraft, + tags, + releaseDate, + modificationDate, + }, + string(html), + files, + }, nil +} diff --git a/backend/go.work b/backend/go.work new file mode 100644 index 0000000..c2e06e8 --- /dev/null +++ b/backend/go.work @@ -0,0 +1,7 @@ +go 1.25 + +use ( + . + go-sqlite3 + goldmark +) diff --git a/backend/md/parser.go b/backend/md/parser.go new file mode 100644 index 0000000..eda0ec3 --- /dev/null +++ b/backend/md/parser.go @@ -0,0 +1,50 @@ +package md + +import ( + "bytes" + "errors" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/parser" +) + +var PropertyNotFoundError = errors.New("property not found") + +func Parse(transformer UrlTransformer, source []byte) (parser.Context, []byte, error) { + md := goldmark.New( + goldmark.WithExtensions( + extension.Footnote, extension.Strikethrough, + extension.Table, + &propertiesExtension{}, + &urlTransformerExtension{transformer}, + ), + goldmark.WithRendererOptions(), + ) + + pc := parser.NewContext() + var readme bytes.Buffer + err := md.Convert(source, &readme, parser.WithContext(pc)) + if err != nil { + return nil, nil, err + } + + return pc, readme.Bytes(), nil +} + +func GetProperty(pc parser.Context, key string) (string, error) { + data := pc.Get(propertiesContextKey) + if data != nil { + pData := data.(*propertiesData) + if pData.Error != nil { + return "", pData.Error + } + + value, ok := pData.properties[key] + if ok { + return value, nil + } + } + + return "", PropertyNotFoundError +} diff --git a/backend/md/properties.go b/backend/md/properties.go new file mode 100644 index 0000000..4142f11 --- /dev/null +++ b/backend/md/properties.go @@ -0,0 +1,109 @@ +package md + +import ( + "errors" + "strings" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +var propertiesContextKey = parser.NewContextKey() + +type propertiesData struct { + properties map[string]string + Error error +} + +type propertiesExtension struct { +} + +func (e *propertiesExtension) Extend(m goldmark.Markdown) { + m.Parser().AddOptions( + parser.WithBlockParsers( + util.Prioritized(&propertiesParser{}, 0), + ), + ) +} + +type propertiesParser struct { +} + +func (p *propertiesParser) Trigger() []byte { + return []byte{'-'} +} + +func (p *propertiesParser) Open(parent ast.Node, reader text.Reader, pc parser.Context) (ast.Node, parser.State) { + lineNumber, _ := reader.Position() + if lineNumber != 0 { + return nil, parser.NoChildren + } + + line, _ := reader.PeekLine() + if isSeparator(line) { + return ast.NewTextBlock(), parser.NoChildren + } + + return nil, parser.NoChildren +} + +func (p *propertiesParser) Continue(node ast.Node, reader text.Reader, pc parser.Context) parser.State { + line, segment := reader.PeekLine() + if isSeparator(line) && !util.IsBlank(line) { + reader.Advance(segment.Len()) + return parser.Close + } + + node.Lines().Append(segment) + return parser.Continue | parser.NoChildren +} + +func (p *propertiesParser) Close(node ast.Node, reader text.Reader, pc parser.Context) { + lines := node.Lines() + + data := &propertiesData{ + make(map[string]string), + nil, + } + pc.Set(propertiesContextKey, data) + + for i := 0; i < lines.Len(); i++ { + segment := lines.At(i) + line := string(segment.Value(reader.Source())) + + index := strings.IndexRune(line, ':') + if index == -1 { + data.Error = errors.New("invalid property line") + break + } + + key := strings.TrimSpace(line[:index]) + value := strings.TrimSpace(line[index+1:]) + data.properties[key] = value + } + + node.Parent().RemoveChild(node.Parent(), node) +} + +func (p *propertiesParser) CanInterruptParagraph() bool { + return false +} + +func (p *propertiesParser) CanAcceptIndentedLine() bool { + return false +} + +func isSeparator(line []byte) bool { + line = util.TrimRightSpace(util.TrimLeftSpace(line)) + + for i := 0; i < len(line); i++ { + if line[i] != '-' { + return false + } + } + + return true +} diff --git a/backend/md/urls.go b/backend/md/urls.go new file mode 100644 index 0000000..d16461f --- /dev/null +++ b/backend/md/urls.go @@ -0,0 +1,110 @@ +package md + +import ( + "bytes" + "strings" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/util" +) + +type UrlTransformer = func(string) (string, error) + +type urlTransformerExtension struct { + transformer UrlTransformer +} + +func (e *urlTransformerExtension) Extend(m goldmark.Markdown) { + m.Renderer().AddOptions(renderer.WithNodeRenderers( + util.Prioritized(&urlRenderer{e.transformer}, 500), + )) +} + +type urlRenderer struct { + transformer UrlTransformer +} + +func (r *urlRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { + reg.Register(ast.KindImage, r.renderImage) + reg.Register(ast.KindLink, r.renderLink) +} + +func (r *urlRenderer) renderImage(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + + n := node.(*ast.Image) + + url, err := r.resolveUrl(string(n.Destination)) + if err != nil { + return ast.WalkStop, err + } + + _, _ = w.WriteString(``)
+	_, _ = w.Write(nodeToHTMLText(n, source))
+	_ = w.WriteByte('") + return ast.WalkSkipChildren, nil +} + +func (r *urlRenderer) renderLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + + n := node.(*ast.Link) + + url, err := r.resolveUrl(string(n.Destination)) + if err != nil { + return ast.WalkStop, err + } + + _, _ = w.WriteString(``) + if n.Title != nil { + _, _ = w.Write(n.Title) + } + _, _ = w.WriteString("") + return ast.WalkSkipChildren, nil +} + +func (r *urlRenderer) resolveUrl(url string) (string, error) { + if !strings.HasPrefix(url, "http://") && !strings.HasPrefix(url, "https://") { + return r.transformer(url) + } + + return url, nil +} + +func nodeToHTMLText(n ast.Node, source []byte) []byte { + var buf bytes.Buffer + for c := n.FirstChild(); c != nil; c = c.NextSibling() { + if s, ok := c.(*ast.String); ok && s.IsCode() { + buf.Write(s.Text(source)) + } else if !c.HasChildren() { + buf.Write(util.EscapeHTML(c.Text(source))) + } else { + buf.Write(nodeToHTMLText(c, source)) + } + } + return buf.Bytes() +}