add parser for blog articles

Signed-off-by: Tobias Erbshäußer <tobias@tesoft.dev>
This commit is contained in:
2026-05-24 09:22:16 +02:00
parent 5147b61c9d
commit 333211d4d0
5 changed files with 427 additions and 0 deletions
+151
View File
@@ -0,0 +1,151 @@
package main
import (
"archive/tar"
"backend/md"
"encoding/json"
"errors"
"fmt"
"io"
"log"
"net/http"
"strconv"
"strings"
"time"
)
type ArticleStatus = int
const (
ArticleStatusDraft ArticleStatus = iota
ArticleStatusPublished
ArticleStatusOffline
)
type ArticleProperties struct {
Id int64 `json:"id"`
Title string `json:"title"`
Status ArticleStatus `json:"status"`
Tags []string `json:"tags"`
ReleaseDate time.Time `json:"date"`
ModificationDate *time.Time `json:"mod-date"`
}
type Article struct {
ArticleProperties
Content string
Files []ArticleFile
}
type ArticleFile struct {
Id int
Data []byte
}
func ParseArticle(reader io.Reader, filePrefix string) (*Article, error) {
tarFiles := make(map[string][]byte)
tarReader := tar.NewReader(reader)
for {
header, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
content, err := io.ReadAll(tarReader)
if err != nil {
return nil, err
}
tarFiles[header.Name] = content
}
readmeBytes, found := tarFiles["README.md"]
if !found {
return nil, errors.New("README.md not found")
}
usedFiles := make(map[string]ArticleFile)
nextFileId := 0
pc, html, err := md.Parse(func(path string) (string, error) {
content, ok := tarFiles[strings.TrimPrefix(path, "./")]
if !ok {
return "", errors.New("file '" + path + "' not found")
}
usedFiles[path] = ArticleFile{nextFileId, content}
nextFileId++
return filePrefix + strconv.Itoa(nextFileId), nil
}, readmeBytes)
if err != nil {
return nil, err
}
title, err := md.GetProperty(pc, "title")
if err != nil {
if errors.Is(err, md.PropertyNotFoundError) {
return nil, errors.New("title property not found")
}
return nil, err
}
tagsStr, err := md.GetProperty(pc, "tags")
if err != nil && !errors.Is(err, md.PropertyNotFoundError) {
return nil, err
}
tags := make([]string, 0)
if strings.TrimSpace(tagsStr) != "" {
for _, tagStr := range strings.Split(tagsStr, ",") {
tags = append(tags, strings.TrimSpace(tagStr))
}
}
dateStr, err := md.GetProperty(pc, "date")
if err != nil {
if errors.Is(err, md.PropertyNotFoundError) {
return nil, errors.New("date property not found")
}
return nil, err
}
releaseDate, err := time.Parse(time.DateOnly, dateStr)
if err != nil {
return nil, fmt.Errorf("invalid date property '%s': %v", dateStr, err)
}
var modificationDate *time.Time
dateStr, err = md.GetProperty(pc, "mod-date")
if err == nil {
tmp, err := time.Parse(time.DateOnly, dateStr)
if err != nil {
return nil, fmt.Errorf("invalid mod-date property '%s': %v", dateStr, err)
}
modificationDate = &tmp
} else if !errors.Is(err, md.PropertyNotFoundError) {
return nil, err
}
files := make([]ArticleFile, 0, len(usedFiles))
for _, file := range usedFiles {
files = append(files, file)
}
return &Article{
ArticleProperties{
-1,
title,
ArticleStatusDraft,
tags,
releaseDate,
modificationDate,
},
string(html),
files,
}, nil
}
+7
View File
@@ -0,0 +1,7 @@
go 1.25
use (
.
go-sqlite3
goldmark
)
+50
View File
@@ -0,0 +1,50 @@
package md
import (
"bytes"
"errors"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
)
var PropertyNotFoundError = errors.New("property not found")
func Parse(transformer UrlTransformer, source []byte) (parser.Context, []byte, error) {
md := goldmark.New(
goldmark.WithExtensions(
extension.Footnote, extension.Strikethrough,
extension.Table,
&propertiesExtension{},
&urlTransformerExtension{transformer},
),
goldmark.WithRendererOptions(),
)
pc := parser.NewContext()
var readme bytes.Buffer
err := md.Convert(source, &readme, parser.WithContext(pc))
if err != nil {
return nil, nil, err
}
return pc, readme.Bytes(), nil
}
func GetProperty(pc parser.Context, key string) (string, error) {
data := pc.Get(propertiesContextKey)
if data != nil {
pData := data.(*propertiesData)
if pData.Error != nil {
return "", pData.Error
}
value, ok := pData.properties[key]
if ok {
return value, nil
}
}
return "", PropertyNotFoundError
}
+109
View File
@@ -0,0 +1,109 @@
package md
import (
"errors"
"strings"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
)
var propertiesContextKey = parser.NewContextKey()
type propertiesData struct {
properties map[string]string
Error error
}
type propertiesExtension struct {
}
func (e *propertiesExtension) Extend(m goldmark.Markdown) {
m.Parser().AddOptions(
parser.WithBlockParsers(
util.Prioritized(&propertiesParser{}, 0),
),
)
}
type propertiesParser struct {
}
func (p *propertiesParser) Trigger() []byte {
return []byte{'-'}
}
func (p *propertiesParser) Open(parent ast.Node, reader text.Reader, pc parser.Context) (ast.Node, parser.State) {
lineNumber, _ := reader.Position()
if lineNumber != 0 {
return nil, parser.NoChildren
}
line, _ := reader.PeekLine()
if isSeparator(line) {
return ast.NewTextBlock(), parser.NoChildren
}
return nil, parser.NoChildren
}
func (p *propertiesParser) Continue(node ast.Node, reader text.Reader, pc parser.Context) parser.State {
line, segment := reader.PeekLine()
if isSeparator(line) && !util.IsBlank(line) {
reader.Advance(segment.Len())
return parser.Close
}
node.Lines().Append(segment)
return parser.Continue | parser.NoChildren
}
func (p *propertiesParser) Close(node ast.Node, reader text.Reader, pc parser.Context) {
lines := node.Lines()
data := &propertiesData{
make(map[string]string),
nil,
}
pc.Set(propertiesContextKey, data)
for i := 0; i < lines.Len(); i++ {
segment := lines.At(i)
line := string(segment.Value(reader.Source()))
index := strings.IndexRune(line, ':')
if index == -1 {
data.Error = errors.New("invalid property line")
break
}
key := strings.TrimSpace(line[:index])
value := strings.TrimSpace(line[index+1:])
data.properties[key] = value
}
node.Parent().RemoveChild(node.Parent(), node)
}
func (p *propertiesParser) CanInterruptParagraph() bool {
return false
}
func (p *propertiesParser) CanAcceptIndentedLine() bool {
return false
}
func isSeparator(line []byte) bool {
line = util.TrimRightSpace(util.TrimLeftSpace(line))
for i := 0; i < len(line); i++ {
if line[i] != '-' {
return false
}
}
return true
}
+110
View File
@@ -0,0 +1,110 @@
package md
import (
"bytes"
"strings"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/renderer"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/util"
)
type UrlTransformer = func(string) (string, error)
type urlTransformerExtension struct {
transformer UrlTransformer
}
func (e *urlTransformerExtension) Extend(m goldmark.Markdown) {
m.Renderer().AddOptions(renderer.WithNodeRenderers(
util.Prioritized(&urlRenderer{e.transformer}, 500),
))
}
type urlRenderer struct {
transformer UrlTransformer
}
func (r *urlRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
reg.Register(ast.KindImage, r.renderImage)
reg.Register(ast.KindLink, r.renderLink)
}
func (r *urlRenderer) renderImage(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkContinue, nil
}
n := node.(*ast.Image)
url, err := r.resolveUrl(string(n.Destination))
if err != nil {
return ast.WalkStop, err
}
_, _ = w.WriteString(`<img src="`)
_, _ = w.WriteString(url)
_, _ = w.WriteString(`" alt="`)
_, _ = w.Write(nodeToHTMLText(n, source))
_ = w.WriteByte('"')
if n.Title != nil {
_, _ = w.WriteString(` title="`)
_, _ = w.Write(n.Title)
_ = w.WriteByte('"')
}
if n.Attributes() != nil {
html.RenderAttributes(w, n, html.ImageAttributeFilter)
}
_, _ = w.WriteString(">")
return ast.WalkSkipChildren, nil
}
func (r *urlRenderer) renderLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkContinue, nil
}
n := node.(*ast.Link)
url, err := r.resolveUrl(string(n.Destination))
if err != nil {
return ast.WalkStop, err
}
_, _ = w.WriteString(`<a href="`)
_, _ = w.WriteString(url)
_, _ = w.WriteString(`"`)
if n.Attributes() != nil {
html.RenderAttributes(w, n, html.ImageAttributeFilter)
}
_, _ = w.WriteString(`>`)
if n.Title != nil {
_, _ = w.Write(n.Title)
}
_, _ = w.WriteString("</a>")
return ast.WalkSkipChildren, nil
}
func (r *urlRenderer) resolveUrl(url string) (string, error) {
if !strings.HasPrefix(url, "http://") && !strings.HasPrefix(url, "https://") {
return r.transformer(url)
}
return url, nil
}
func nodeToHTMLText(n ast.Node, source []byte) []byte {
var buf bytes.Buffer
for c := n.FirstChild(); c != nil; c = c.NextSibling() {
if s, ok := c.(*ast.String); ok && s.IsCode() {
buf.Write(s.Text(source))
} else if !c.HasChildren() {
buf.Write(util.EscapeHTML(c.Text(source)))
} else {
buf.Write(nodeToHTMLText(c, source))
}
}
return buf.Bytes()
}