From 9c3fbb4dc03b99da640b82366d37553e2418139d Mon Sep 17 00:00:00 2001 From: Zibbp Date: Sat, 11 May 2024 23:28:30 +0000 Subject: [PATCH 1/4] fix(archive/chat): rework live chat to tdl chat parsing Attempt to correct invalid emote positions from chat-downloader --- internal/transport/http/archive.go | 60 +++++ internal/transport/http/handler.go | 2 + internal/utils/chat.go | 382 ----------------------------- internal/utils/live_chat.go | 74 ++++++ internal/utils/tdl.go | 348 ++++++++++++++++++++++++++ 5 files changed, 484 insertions(+), 382 deletions(-) delete mode 100644 internal/utils/chat.go create mode 100644 internal/utils/live_chat.go create mode 100644 internal/utils/tdl.go diff --git a/internal/transport/http/archive.go b/internal/transport/http/archive.go index 6f956784..0c1f992c 100644 --- a/internal/transport/http/archive.go +++ b/internal/transport/http/archive.go @@ -2,7 +2,9 @@ package http import ( "net/http" + "time" + "github.com/google/uuid" "github.com/labstack/echo/v4" "github.com/zibbp/ganymede/ent" "github.com/zibbp/ganymede/internal/archive" @@ -79,3 +81,61 @@ func (h *Handler) ArchiveTwitchVod(c echo.Context) error { } return c.JSON(http.StatusOK, vod) } + +// RestartTask godoc +// +// @Summary Restart a task +// @Description Restart a task +// @Tags archive +// @Accept json +// @Produce json +// @Param queue_id path string true "Queue ID" +// @Param task body RestartTaskRequest true "Task" +// @Success 200 +// @Failure 400 {object} utils.ErrorResponse +// @Failure 500 {object} utils.ErrorResponse +// @Router /archive/restart [post] +// @Security ApiKeyCookieAuth +func (h *Handler) RestartTask(c echo.Context) error { + rtr := new(RestartTaskRequest) + if err := c.Bind(rtr); err != nil { + return echo.NewHTTPError(http.StatusBadRequest, err.Error()) + } + if err := c.Validate(rtr); err != nil { + return echo.NewHTTPError(http.StatusBadRequest, err.Error()) + } + qUUID, err := uuid.Parse(rtr.QueueID) + if err != nil { + return echo.NewHTTPError(http.StatusBadRequest, err.Error()) + } + + err = h.Service.ArchiveService.RestartTask(c, qUUID, rtr.Task, rtr.Cont) + if err != nil { + return echo.NewHTTPError(http.StatusInternalServerError, err.Error()) + } + + return c.NoContent(http.StatusOK) +} + +// debug route to test converting chat files +func (h *Handler) ConvertTwitchChat(c echo.Context) error { + type Body struct { + LiveChatPath string `json:"live_chat_path"` + ChannelName string `json:"channel_name"` + VideoID string `json:"video_id"` + VideoExternalID string `json:"video_external_id"` + ChannelID int `json:"channel_id"` + PreviousVideoID string `json:"previous_video_id"` + } + body := new(Body) + if err := c.Bind(body); err != nil { + return echo.NewHTTPError(http.StatusBadRequest, err.Error()) + } + + err := utils.ConvertTwitchLiveChatToTDLChat(body.LiveChatPath, body.ChannelName, body.VideoID, body.VideoExternalID, body.ChannelID, time.Now(), body.PreviousVideoID) + if err != nil { + return echo.NewHTTPError(http.StatusInternalServerError, err.Error()) + } + + return c.NoContent(http.StatusOK) +} diff --git a/internal/transport/http/handler.go b/internal/transport/http/handler.go index 19d84394..eb008259 100644 --- a/internal/transport/http/handler.go +++ b/internal/transport/http/handler.go @@ -205,6 +205,8 @@ func groupV1Routes(e *echo.Group, h *Handler) { archiveGroup := e.Group("/archive") archiveGroup.POST("/channel", h.ArchiveTwitchChannel, auth.GuardMiddleware, auth.GetUserMiddleware, auth.UserRoleMiddleware(utils.ArchiverRole)) archiveGroup.POST("/vod", h.ArchiveTwitchVod, auth.GuardMiddleware, auth.GetUserMiddleware, auth.UserRoleMiddleware(utils.ArchiverRole)) + archiveGroup.POST("/restart", h.RestartTask, auth.GuardMiddleware, auth.GetUserMiddleware, auth.UserRoleMiddleware(utils.ArchiverRole)) + archiveGroup.POST("/convert-twitch-live-chat", h.ConvertTwitchChat, auth.GuardMiddleware, auth.GetUserMiddleware, auth.UserRoleMiddleware(utils.AdminRole)) // Admin adminGroup := e.Group("/admin") diff --git a/internal/utils/chat.go b/internal/utils/chat.go deleted file mode 100644 index 18f45e93..00000000 --- a/internal/utils/chat.go +++ /dev/null @@ -1,382 +0,0 @@ -package utils - -import ( - "encoding/json" - "fmt" - "io" - "os" - "sort" - "strconv" - "strings" - "time" - - "github.com/rs/zerolog/log" -) - -type ParsedChat struct { - Streamer Streamer `json:"streamer"` - Video Video `json:"video"` - Comments []Comment `json:"comments"` -} - -type Streamer struct { - Name string `json:"name"` - ID int `json:"id"` -} - -type Video struct { - ID string `json:"id"` - Start int64 `json:"start"` - End int64 `json:"end"` -} - -type Comment struct { - ID string `json:"_id"` - Source string `json:"source"` - ContentOffsetSeconds float64 `json:"content_offset_seconds"` - Commenter Commenter `json:"commenter"` - Message Message `json:"message"` -} - -type Commenter struct { - DisplayName string `json:"display_name"` - ID string `json:"id"` - IsModerator bool `json:"is_moderator"` - IsSubscriber bool `json:"is_subscriber"` - IsTurbo bool `json:"is_turbo"` - Name string `json:"name"` -} - -type Message struct { - Body string `json:"body"` - BitsSpent int `json:"bits_spent"` - Fragments []Fragment `json:"fragments"` - UserBadges []UserBadge `json:"user_badges"` - UserColor string `json:"user_color"` - UserNoticeParams UserNoticParams `json:"user_notice_params"` -} - -type Fragment struct { - Text string `json:"text"` - Emoticon *Emoticon `json:"emoticon"` - Pos1 int `json:"pos1"` - Pos2 int `json:"pos2"` -} - -type UserBadge struct { - ID string `json:"_id"` - Version string `json:"version"` -} - -type UserNoticParams struct { - MsgID *string `json:"msg-id"` -} - -type Emoticon struct { - EmoticonID string `json:"emoticon_id"` - EmoticonSetID string `json:"emoticon_set_id"` -} - -type LiveChat struct { - Comments []LiveComment `json:"comments"` -} - -type LiveComment struct { - ActionType string `json:"action_type"` - Author struct { - Badges []struct { - ClickAction string `json:"click_action"` - ClickURL string `json:"click_url"` - Description string `json:"description"` - Icons []struct { - Height int `json:"height"` - ID string `json:"id"` - URL string `json:"url"` - Width int `json:"width"` - } `json:"icons"` - ID string `json:"id"` - Name string `json:"name"` - Title string `json:"title"` - Version interface{} `json:"version"` - } `json:"badges"` - DisplayName string `json:"display_name"` - ID string `json:"id"` - IsModerator bool `json:"is_moderator"` - IsSubscriber bool `json:"is_subscriber"` - IsTurbo bool `json:"is_turbo"` - Name string `json:"name"` - } `json:"author"` - ChannelID string `json:"channel_id"` - ClientNonce string `json:"client_nonce"` - Colour string `json:"colour"` - Emotes []struct { - ID string `json:"id"` - Images []struct { - Height int `json:"height"` - ID string `json:"id"` - URL string `json:"url"` - Width int `json:"width"` - } `json:"images"` - Locations []string `json:"locations"` - Name string `json:"name"` - } `json:"emotes"` - Flags string `json:"flags"` - IsFirstMessage bool `json:"is_first_message"` - Message string `json:"message"` - MessageID string `json:"message_id"` - MessageType string `json:"message_type"` - ReturningChatter string `json:"returning_chatter"` - Timestamp int64 `json:"timestamp"` - UserType string `json:"user_type"` -} - -func OpenChatFile(path string) ([]LiveComment, error) { - - liveChatJsonFile, err := os.Open(path) - if err != nil { - return nil, fmt.Errorf("failed to open chat file: %v", err) - } - defer liveChatJsonFile.Close() - byteValue, _ := io.ReadAll(liveChatJsonFile) - - var liveComments []LiveComment - err = json.Unmarshal(byteValue, &liveComments) - if err != nil { - return nil, fmt.Errorf("failed to unmarshal chat file: %v", err) - } - return liveComments, nil -} - -func ConvertTwitchLiveChatToVodChat(path string, channelName string, vID string, vExtID string, cID int, chatStart time.Time, previousVideoID string) error { - - log.Debug().Msg("Converting Twitch Live Chat to Vod Chat") - - liveComments, err := OpenChatFile(path) - if err != nil { - return err - } - - // BEGIN CONVERSION LIVE -> PARSED - var parsedChat ParsedChat - parsedChat.Streamer.Name = channelName - parsedChat.Streamer.ID = cID - parsedChat.Video.ID = previousVideoID - parsedChat.Video.Start = 0 - - var parsedComments []Comment - - // Create an initial comment to mark the start of the chat session - initialComment := Comment{ - ID: "546a5e6e-c820-4ad2-9421-9ba5b5bf37ea", - Source: "chat", - ContentOffsetSeconds: 0, - Commenter: Commenter{ - DisplayName: "Ganymede", - ID: "222777213", - IsModerator: false, - IsSubscriber: false, - IsTurbo: false, - Name: "ganymede", - }, - Message: Message{ - Body: "Initial chat message", - BitsSpent: 0, - Fragments: []Fragment{ - { - Text: "Initial chat message", - Emoticon: nil, - Pos1: 0, - Pos2: 0, - }, - }, - UserBadges: []UserBadge{}, - UserColor: "#a65ee8", - UserNoticeParams: UserNoticParams{ - MsgID: nil, - }, - }, - } - parsedComments = append(parsedComments, initialComment) - - for _, liveComment := range liveComments { - // Check if comment is empty - if liveComment.Message == "" { - continue - } - - var parsedComment Comment - - // Get offset in seconds - liveCommentUnix, err := microSecondToMillisecondUnix(liveComment.Timestamp) - if err != nil { - return fmt.Errorf("failed to convert live comment timestamp: %v", err) - } - // Use chat start time to get offset in seconds - diff := liveCommentUnix.Sub(chatStart) - parsedComment.ContentOffsetSeconds = diff.Seconds() - - parsedComment.ID = liveComment.MessageID - parsedComment.Source = "chat" - parsedComment.Commenter.DisplayName = liveComment.Author.DisplayName - parsedComment.Commenter.ID = liveComment.Author.ID - parsedComment.Commenter.IsModerator = liveComment.Author.IsModerator - parsedComment.Commenter.IsSubscriber = liveComment.Author.IsSubscriber - parsedComment.Commenter.IsTurbo = liveComment.Author.IsTurbo - parsedComment.Commenter.Name = liveComment.Author.Name - - parsedComment.Message.Body = liveComment.Message - parsedComment.Message.BitsSpent = 0 - firstFragment := Fragment{ - Text: liveComment.Message, - Emoticon: nil, - } - parsedComment.Message.Fragments = append(parsedComment.Message.Fragments, firstFragment) - parsedComment.Message.UserBadges = []UserBadge{} - parsedComment.Message.UserColor = liveComment.Colour - parsedComment.Message.UserNoticeParams = UserNoticParams{ - MsgID: nil, - } - - var emoteFragments []Fragment - - // Extract emotes and create fragments with positions - if liveComment.Emotes != nil { - for _, liveEmote := range liveComment.Emotes { - for _, liveEmoteLocation := range liveEmote.Locations { - var emoteFragment Fragment - var emoticonFragment Emoticon - emoteFragment.Emoticon = &emoticonFragment - - // Get position of emote in message - emotePositions := strings.Split(liveEmoteLocation, "-") - - pos1, err := strconv.Atoi(emotePositions[0]) - if err != nil { - return fmt.Errorf("failed to convert emote position: %v", err) - } - pos2, err := strconv.Atoi(emotePositions[1]) - if err != nil { - return fmt.Errorf("failed to convert emote position: %v", err) - } - - emoteFragment.Pos1 = pos1 - emoteFragment.Pos2 = pos2 + 1 - - if pos2+1 > len(liveComment.Message) { - log.Debug().Msgf("Message: %s -- has an out-of-bounds emote position, skipping.", liveComment.Message) - } else { - slicedEmote := liveComment.Message[pos1 : pos2+1] - emoteFragment.Text = slicedEmote - emoteFragment.Emoticon.EmoticonID = liveEmote.ID - emoteFragment.Emoticon.EmoticonSetID = "" - - emoteFragments = append(emoteFragments, emoteFragment) - - } - - } - } - } - - // Sort emoteFragments by position ascending - sort.Slice(emoteFragments, func(i, j int) bool { - return emoteFragments[i].Pos1 < emoteFragments[j].Pos1 - }) - - var formattedEmoteFragments []Fragment - - // Remove emote fragments from message fragments - for i, emoteFragment := range emoteFragments { - if i == 0 { - fragmentText := parsedComment.Message.Body[:emoteFragment.Pos1] - fragment := Fragment{ - Text: fragmentText, - Emoticon: nil, - } - formattedEmoteFragments = append(formattedEmoteFragments, fragment) - formattedEmoteFragments = append(formattedEmoteFragments, emoteFragment) - } else { - fragmentText := parsedComment.Message.Body[emoteFragments[i-1].Pos2:emoteFragment.Pos1] - fragment := Fragment{ - Text: fragmentText, - Emoticon: nil, - } - formattedEmoteFragments = append(formattedEmoteFragments, fragment) - formattedEmoteFragments = append(formattedEmoteFragments, emoteFragment) - } - } - - // Check if last fragment is an emoticon - if len(formattedEmoteFragments) > 0 { - lastItem := len(formattedEmoteFragments) - 1 - if formattedEmoteFragments[lastItem].Emoticon.EmoticonID != "" { - fragmentText := parsedComment.Message.Body[formattedEmoteFragments[lastItem].Pos2:] - fragment := Fragment{ - Text: fragmentText, - Emoticon: nil, - } - formattedEmoteFragments = append(formattedEmoteFragments, fragment) - } - } - - // If message has emote fragments - if len(formattedEmoteFragments) > 0 { - parsedComment.Message.Fragments = formattedEmoteFragments - - } - - // User badges - if (liveComment.Author.Badges != nil) && (len(liveComment.Author.Badges) > 0) { - for _, liveBadge := range liveComment.Author.Badges { - userBadge := UserBadge{ - ID: liveBadge.Name, - Version: fmt.Sprintf("%v", liveBadge.Version), - } - parsedComment.Message.UserBadges = append(parsedComment.Message.UserBadges, userBadge) - } - } - - // Some users don't have a display name color set - if parsedComment.Message.UserColor == "" { - parsedComment.Message.UserColor = "#a65ee8" - } - - // Push it - parsedComments = append(parsedComments, parsedComment) - } - - parsedChat.Comments = parsedComments - - // get last comment offset and set as video end - lastComment := parsedChat.Comments[len(parsedChat.Comments)-1] - parsedChat.Video.End = int64(lastComment.ContentOffsetSeconds) - - err = writeParsedChat(parsedChat, vID, vExtID) - if err != nil { - return err - } - return nil -} - -func writeParsedChat(parsedChat ParsedChat, vID string, vExtID string) error { - data, err := json.Marshal(parsedChat) - if err != nil { - return fmt.Errorf("failed to marshal parsed comments: %v", err) - } - err = os.WriteFile(fmt.Sprintf("/tmp/%s_%s-chat-convert.json", vExtID, vID), data, 0644) - if err != nil { - return fmt.Errorf("failed to write parsed comments: %v", err) - } - return nil -} - -func microSecondToMillisecondUnix(t int64) (time.Time, error) { - sT := strconv.FormatInt(t, 10) - fST := sT[:len(sT)-3] - iFST, err := strconv.ParseInt(fST, 10, 64) - if err != nil { - return time.Time{}, err - } - unixTimeUTC := time.Unix(iFST/int64(1000), (iFST%int64(1000))*int64(1000000)) - return unixTimeUTC, nil -} diff --git a/internal/utils/live_chat.go b/internal/utils/live_chat.go new file mode 100644 index 00000000..a59eed53 --- /dev/null +++ b/internal/utils/live_chat.go @@ -0,0 +1,74 @@ +package utils + +import ( + "encoding/json" + "fmt" + "io" + "os" +) + +type LiveComment struct { + ActionType string `json:"action_type"` + Author struct { + Badges []struct { + ClickAction string `json:"click_action"` + ClickURL string `json:"click_url"` + Description string `json:"description"` + Icons []struct { + Height int `json:"height"` + ID string `json:"id"` + URL string `json:"url"` + Width int `json:"width"` + } `json:"icons"` + ID string `json:"id"` + Name string `json:"name"` + Title string `json:"title"` + Version interface{} `json:"version"` + } `json:"badges"` + DisplayName string `json:"display_name"` + ID string `json:"id"` + IsModerator bool `json:"is_moderator"` + IsSubscriber bool `json:"is_subscriber"` + IsTurbo bool `json:"is_turbo"` + Name string `json:"name"` + } `json:"author"` + ChannelID string `json:"channel_id"` + ClientNonce string `json:"client_nonce"` + Colour string `json:"colour"` + Emotes []struct { + ID string `json:"id"` + Images []struct { + Height int `json:"height"` + ID string `json:"id"` + URL string `json:"url"` + Width int `json:"width"` + } `json:"images"` + Locations []string `json:"locations"` + Name string `json:"name"` + } `json:"emotes"` + Flags string `json:"flags"` + IsFirstMessage bool `json:"is_first_message"` + Message string `json:"message"` + MessageID string `json:"message_id"` + MessageType string `json:"message_type"` + ReturningChatter string `json:"returning_chatter"` + Timestamp int64 `json:"timestamp"` + UserType string `json:"user_type"` +} + +func OpenLiveChatFile(path string) ([]LiveComment, error) { + + liveChatJsonFile, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("failed to open chat file: %v", err) + } + defer liveChatJsonFile.Close() + byteValue, _ := io.ReadAll(liveChatJsonFile) + + var liveComments []LiveComment + err = json.Unmarshal(byteValue, &liveComments) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal chat file: %v", err) + } + return liveComments, nil +} diff --git a/internal/utils/tdl.go b/internal/utils/tdl.go new file mode 100644 index 00000000..1aff6a04 --- /dev/null +++ b/internal/utils/tdl.go @@ -0,0 +1,348 @@ +package utils + +import ( + "encoding/json" + "fmt" + "os" + "regexp" + "sort" + "strconv" + "strings" + "time" + + "github.com/rs/zerolog/log" +) + +type TDLChat struct { + Streamer Streamer `json:"streamer"` + Video Video `json:"video"` + Comments []Comment `json:"comments"` +} + +type Streamer struct { + Name string `json:"name"` + ID int `json:"id"` +} + +type Video struct { + ID string `json:"id"` + Start int64 `json:"start"` + End int64 `json:"end"` +} + +type Comment struct { + ID string `json:"_id"` + Source string `json:"source"` + ContentOffsetSeconds float64 `json:"content_offset_seconds"` + Commenter Commenter `json:"commenter"` + Message Message `json:"message"` +} + +type Commenter struct { + DisplayName string `json:"display_name"` + ID string `json:"id"` + IsModerator bool `json:"is_moderator"` + IsSubscriber bool `json:"is_subscriber"` + IsTurbo bool `json:"is_turbo"` + Name string `json:"name"` +} + +type Message struct { + Body string `json:"body"` + BitsSpent int `json:"bits_spent"` + Fragments []Fragment `json:"fragments"` + UserBadges []UserBadge `json:"user_badges"` + UserColor string `json:"user_color"` + UserNoticeParams UserNoticParams `json:"user_notice_params"` +} + +type Fragment struct { + Text string `json:"text"` + Emoticon *Emoticon `json:"emoticon"` + Pos1 int `json:"pos1"` + Pos2 int `json:"pos2"` +} + +type UserBadge struct { + ID string `json:"_id"` + Version string `json:"version"` +} + +type UserNoticParams struct { + MsgID *string `json:"msg-id"` +} + +type Emoticon struct { + EmoticonID string `json:"emoticon_id"` + EmoticonSetID string `json:"emoticon_set_id"` +} + +type LiveChat struct { + Comments []LiveComment `json:"comments"` +} + +func findSubstringPositions(input string, substring string, occurrenceNumber int) (start int, end int, found bool) { + re := regexp.MustCompile(regexp.QuoteMeta(substring)) + matches := re.FindAllStringIndex(input, -1) + + if occurrenceNumber <= len(matches) { + startIndex := matches[occurrenceNumber-1][0] + endIndex := matches[occurrenceNumber-1][1] + return startIndex, endIndex, true + } + + return -1, -1, false +} + +func ConvertTwitchLiveChatToTDLChat(path string, channelName string, videoID string, videoExternalID string, channelID int, chatStartTime time.Time, previousVideoID string) error { + + log.Debug().Str("chat_file", path).Msg("Converting live Twitch chat to TDL chat for rendering") + + liveComments, err := OpenLiveChatFile(path) + if err != nil { + return err + } + + tdlChat := TDLChat{} + + tdlChat.Streamer.Name = channelName + tdlChat.Streamer.ID = channelID + tdlChat.Video.ID = previousVideoID // we don't know the video (vod) id at this point + tdlChat.Video.Start = 0 + + tdlComments := []Comment{} + + // create an initial comment to mark the start of chat + tdlComments = append(tdlComments, Comment{ + ID: "546a5e6e-c820-4ad2-9421-9ba5b5bf37ea", + Source: "chat", + ContentOffsetSeconds: 0, + Commenter: Commenter{ + DisplayName: "Ganymede", + ID: "222777213", + IsModerator: false, + IsSubscriber: false, + IsTurbo: false, + Name: "ganymede", + }, + Message: Message{ + Body: "Initial chat message", + BitsSpent: 0, + Fragments: []Fragment{ + { + Text: "Initial chat message", + Emoticon: nil, + Pos1: 0, + Pos2: 0, + }, + }, + UserBadges: []UserBadge{}, + UserColor: "#a65ee8", + UserNoticeParams: UserNoticParams{ + MsgID: nil, + }, + }, + }) + + for _, liveComment := range liveComments { + if liveComment.Message == "" { + continue + } + + // get offset in seconds + liveCommentUnix, err := microSecondToMillisecondUnix(liveComment.Timestamp) + if err != nil { + return fmt.Errorf("failed to convert live comment timestamp: %v", err) + } + + // use chat start time to get offset in seconds + diff := liveCommentUnix.Sub(chatStartTime) + + // populate static variables + tdlComment := Comment{ + ContentOffsetSeconds: diff.Seconds(), + ID: liveComment.MessageID, + Source: "chat", + Commenter: Commenter{ + ID: liveComment.Author.ID, + DisplayName: liveComment.Author.DisplayName, + Name: liveComment.Author.Name, + IsModerator: liveComment.Author.IsModerator, + IsSubscriber: liveComment.Author.IsSubscriber, + IsTurbo: liveComment.Author.IsTurbo, + }, + Message: Message{ + Body: liveComment.Message, + BitsSpent: 0, + UserBadges: []UserBadge{}, + UserColor: liveComment.Colour, + UserNoticeParams: UserNoticParams{ + MsgID: nil, + }, + }, + } + + // create the first message fragment + tdlComment.Message.Fragments = append(tdlComment.Message.Fragments, Fragment{ + Text: liveComment.Message, + Emoticon: nil, + }) + + // parse emotes, creating fragments with positions + emoteFragments := []Fragment{} + if liveComment.Emotes != nil { + for _, liveCommentEmote := range liveComment.Emotes { + for i, liveCommentEmoteLocation := range liveCommentEmote.Locations { + var pos1, pos2 int + var emoteFragment Fragment + // get position of emote in message + emotePositions := strings.Split(liveCommentEmoteLocation, "-") + pos1, err := strconv.Atoi(emotePositions[0]) + if err != nil { + return fmt.Errorf("failed to convert emote position: %v", err) + } + pos2 = pos1 + len(liveCommentEmote.Name) + + slicedEmote := liveComment.Message[pos1:pos2] + + // ensure that the sliced string equals the emote + // sometimes the output of chat-downloader will not include a unicode character when calculating positions causing an offset in positions + if slicedEmote != liveCommentEmote.Name { + log.Debug().Str("message_id", liveComment.MessageID).Msg("emote position mismatch detected while converting chat") + + // attempt to get emote position in comment message + pos1, pos2, found := findSubstringPositions(liveComment.Message, liveCommentEmote.Name, i+1) + if !found { + log.Warn().Str("message_id", liveComment.MessageID).Msg("unable to extract emote positions from message, skpping") + continue + } + slicedEmote = liveComment.Message[pos1:pos2] + emoteFragment = Fragment{ + Pos1: pos1, + Pos2: pos2, + Text: slicedEmote, + Emoticon: &Emoticon{ + EmoticonID: liveCommentEmote.ID, + EmoticonSetID: "", + }, + } + } else { + emoteFragment = Fragment{ + Pos1: pos1, + Pos2: pos2, + Text: slicedEmote, + Emoticon: &Emoticon{ + EmoticonID: liveCommentEmote.ID, + EmoticonSetID: "", + }, + } + } + + emoteFragments = append(emoteFragments, emoteFragment) + } + } + } + + // sort emote fragments by position ascending + sort.Slice(emoteFragments, func(i, j int) bool { + return emoteFragments[i].Pos1 < emoteFragments[j].Pos1 + }) + + formattedEmoteFragments := []Fragment{} + + // remove emote fragments from message fragments + for i, emoteFragment := range emoteFragments { + if i == 0 { + fragmentText := tdlComment.Message.Body[:emoteFragment.Pos1] + fragment := Fragment{ + Text: fragmentText, + Emoticon: nil, + } + formattedEmoteFragments = append(formattedEmoteFragments, fragment) + formattedEmoteFragments = append(formattedEmoteFragments, emoteFragment) + } else { + fragmentText := tdlComment.Message.Body[emoteFragments[i-1].Pos2:emoteFragment.Pos1] + fragment := Fragment{ + Text: fragmentText, + Emoticon: nil, + } + formattedEmoteFragments = append(formattedEmoteFragments, fragment) + formattedEmoteFragments = append(formattedEmoteFragments, emoteFragment) + } + } + + // check if last fragment is an emoticon + if len(formattedEmoteFragments) > 0 { + lastItem := len(formattedEmoteFragments) - 1 + if formattedEmoteFragments[lastItem].Emoticon.EmoticonID != "" { + fragmentText := tdlComment.Message.Body[formattedEmoteFragments[lastItem].Pos2:] + fragment := Fragment{ + Text: fragmentText, + Emoticon: nil, + } + formattedEmoteFragments = append(formattedEmoteFragments, fragment) + } + } + + // ensure message has emote fragments + if len(formattedEmoteFragments) > 0 { + tdlComment.Message.Fragments = formattedEmoteFragments + } + + // user badges + if (liveComment.Author.Badges != nil) && (len(liveComment.Author.Badges) > 0) { + for _, liveCommentBadge := range liveComment.Author.Badges { + liveCommentUserBadge := UserBadge{ + ID: liveCommentBadge.Name, + Version: fmt.Sprintf("%v", liveCommentBadge.Version), + } + tdlComment.Message.UserBadges = append(tdlComment.Message.UserBadges, liveCommentUserBadge) + } + } + + // ensure user has a display name color + if tdlComment.Message.UserColor == "" { + tdlComment.Message.UserColor = "#a65ee8" + } + + tdlComments = append(tdlComments, tdlComment) + } + + tdlChat.Comments = tdlComments + + // get last comment offset and set as video end + lastComment := tdlChat.Comments[len(tdlChat.Comments)-1] + tdlChat.Video.End = int64(lastComment.ContentOffsetSeconds) + + // write chat + err = writeTDLChat(tdlChat, videoID, videoExternalID) + if err != nil { + return err + } + + return nil + +} + +func writeTDLChat(parsedChat TDLChat, vID string, vExtID string) error { + data, err := json.Marshal(parsedChat) + if err != nil { + return fmt.Errorf("failed to marshal parsed comments: %v", err) + } + err = os.WriteFile(fmt.Sprintf("/tmp/%s_%s-chat-convert.json", vExtID, vID), data, 0644) + if err != nil { + return fmt.Errorf("failed to write parsed comments: %v", err) + } + return nil +} + +func microSecondToMillisecondUnix(t int64) (time.Time, error) { + sT := strconv.FormatInt(t, 10) + fST := sT[:len(sT)-3] + iFST, err := strconv.ParseInt(fST, 10, 64) + if err != nil { + return time.Time{}, err + } + unixTimeUTC := time.Unix(iFST/int64(1000), (iFST%int64(1000))*int64(1000000)) + return unixTimeUTC, nil +} From 4bf9ebc0cfb58825ccabf39e550fbc86b22271e4 Mon Sep 17 00:00:00 2001 From: Zibbp Date: Sat, 11 May 2024 23:32:08 +0000 Subject: [PATCH 2/4] ref(utils): move generic function to generic file --- internal/utils/chat.go | 17 +++++++++++++++++ internal/utils/tdl.go | 14 -------------- 2 files changed, 17 insertions(+), 14 deletions(-) create mode 100644 internal/utils/chat.go diff --git a/internal/utils/chat.go b/internal/utils/chat.go new file mode 100644 index 00000000..21afccbd --- /dev/null +++ b/internal/utils/chat.go @@ -0,0 +1,17 @@ +package utils + +import "regexp" + +// find the substring position in a string. Supports passing an occurrence to find the Nth place of the substring in the string +func findSubstringPositions(input string, substring string, occurrenceNumber int) (start int, end int, found bool) { + re := regexp.MustCompile(regexp.QuoteMeta(substring)) + matches := re.FindAllStringIndex(input, -1) + + if occurrenceNumber <= len(matches) { + startIndex := matches[occurrenceNumber-1][0] + endIndex := matches[occurrenceNumber-1][1] + return startIndex, endIndex, true + } + + return -1, -1, false +} diff --git a/internal/utils/tdl.go b/internal/utils/tdl.go index 1aff6a04..f6882912 100644 --- a/internal/utils/tdl.go +++ b/internal/utils/tdl.go @@ -4,7 +4,6 @@ import ( "encoding/json" "fmt" "os" - "regexp" "sort" "strconv" "strings" @@ -81,19 +80,6 @@ type LiveChat struct { Comments []LiveComment `json:"comments"` } -func findSubstringPositions(input string, substring string, occurrenceNumber int) (start int, end int, found bool) { - re := regexp.MustCompile(regexp.QuoteMeta(substring)) - matches := re.FindAllStringIndex(input, -1) - - if occurrenceNumber <= len(matches) { - startIndex := matches[occurrenceNumber-1][0] - endIndex := matches[occurrenceNumber-1][1] - return startIndex, endIndex, true - } - - return -1, -1, false -} - func ConvertTwitchLiveChatToTDLChat(path string, channelName string, videoID string, videoExternalID string, channelID int, chatStartTime time.Time, previousVideoID string) error { log.Debug().Str("chat_file", path).Msg("Converting live Twitch chat to TDL chat for rendering") From 66bfe0a55c7785326dfc45ac986a901a3890fc9d Mon Sep 17 00:00:00 2001 From: Zibbp Date: Mon, 27 May 2024 15:21:51 +0000 Subject: [PATCH 3/4] fixes from merge --- internal/activities/video.go | 2 +- internal/transport/http/archive.go | 36 ------------------------------ internal/transport/http/handler.go | 1 - 3 files changed, 1 insertion(+), 38 deletions(-) diff --git a/internal/activities/video.go b/internal/activities/video.go index 3837bcbc..c5444ca0 100644 --- a/internal/activities/video.go +++ b/internal/activities/video.go @@ -817,7 +817,7 @@ func ConvertTwitchLiveChat(ctx context.Context, input dto.ArchiveVideoInput) err previousVideoID = "132195945" } - err = utils.ConvertTwitchLiveChatToVodChat(input.Vod.TmpLiveChatDownloadPath, input.Channel.Name, input.Vod.ID.String(), input.Vod.ExtID, cID, input.Queue.ChatStart, string(previousVideoID)) + err = utils.ConvertTwitchLiveChatToTDLChat(input.Vod.TmpLiveChatDownloadPath, input.Channel.Name, input.Vod.ID.String(), input.Vod.ExtID, cID, input.Queue.ChatStart, string(previousVideoID)) if err != nil { log.Error().Err(err).Msg("error converting chat") _, dbErr := database.DB().Client.Queue.UpdateOneID(input.Queue.ID).SetTaskChatConvert(utils.Failed).Save(ctx) diff --git a/internal/transport/http/archive.go b/internal/transport/http/archive.go index 0c1f992c..aea69c49 100644 --- a/internal/transport/http/archive.go +++ b/internal/transport/http/archive.go @@ -4,7 +4,6 @@ import ( "net/http" "time" - "github.com/google/uuid" "github.com/labstack/echo/v4" "github.com/zibbp/ganymede/ent" "github.com/zibbp/ganymede/internal/archive" @@ -82,41 +81,6 @@ func (h *Handler) ArchiveTwitchVod(c echo.Context) error { return c.JSON(http.StatusOK, vod) } -// RestartTask godoc -// -// @Summary Restart a task -// @Description Restart a task -// @Tags archive -// @Accept json -// @Produce json -// @Param queue_id path string true "Queue ID" -// @Param task body RestartTaskRequest true "Task" -// @Success 200 -// @Failure 400 {object} utils.ErrorResponse -// @Failure 500 {object} utils.ErrorResponse -// @Router /archive/restart [post] -// @Security ApiKeyCookieAuth -func (h *Handler) RestartTask(c echo.Context) error { - rtr := new(RestartTaskRequest) - if err := c.Bind(rtr); err != nil { - return echo.NewHTTPError(http.StatusBadRequest, err.Error()) - } - if err := c.Validate(rtr); err != nil { - return echo.NewHTTPError(http.StatusBadRequest, err.Error()) - } - qUUID, err := uuid.Parse(rtr.QueueID) - if err != nil { - return echo.NewHTTPError(http.StatusBadRequest, err.Error()) - } - - err = h.Service.ArchiveService.RestartTask(c, qUUID, rtr.Task, rtr.Cont) - if err != nil { - return echo.NewHTTPError(http.StatusInternalServerError, err.Error()) - } - - return c.NoContent(http.StatusOK) -} - // debug route to test converting chat files func (h *Handler) ConvertTwitchChat(c echo.Context) error { type Body struct { diff --git a/internal/transport/http/handler.go b/internal/transport/http/handler.go index eb008259..e6f8f1d5 100644 --- a/internal/transport/http/handler.go +++ b/internal/transport/http/handler.go @@ -205,7 +205,6 @@ func groupV1Routes(e *echo.Group, h *Handler) { archiveGroup := e.Group("/archive") archiveGroup.POST("/channel", h.ArchiveTwitchChannel, auth.GuardMiddleware, auth.GetUserMiddleware, auth.UserRoleMiddleware(utils.ArchiverRole)) archiveGroup.POST("/vod", h.ArchiveTwitchVod, auth.GuardMiddleware, auth.GetUserMiddleware, auth.UserRoleMiddleware(utils.ArchiverRole)) - archiveGroup.POST("/restart", h.RestartTask, auth.GuardMiddleware, auth.GetUserMiddleware, auth.UserRoleMiddleware(utils.ArchiverRole)) archiveGroup.POST("/convert-twitch-live-chat", h.ConvertTwitchChat, auth.GuardMiddleware, auth.GetUserMiddleware, auth.UserRoleMiddleware(utils.AdminRole)) // Admin From 48a047dda8097d2e094d765d2328fdc477c8b9e1 Mon Sep 17 00:00:00 2001 From: Zibbp Date: Tue, 28 May 2024 03:00:26 +0000 Subject: [PATCH 4/4] feat(chat): better handling of unicode emojis --- internal/transport/http/archive.go | 26 +++++++++++++++++++------- internal/utils/chat.go | 26 +++++++++++++++++++++++--- internal/utils/tdl.go | 4 ++++ 3 files changed, 46 insertions(+), 10 deletions(-) diff --git a/internal/transport/http/archive.go b/internal/transport/http/archive.go index aea69c49..ef616a07 100644 --- a/internal/transport/http/archive.go +++ b/internal/transport/http/archive.go @@ -2,6 +2,7 @@ package http import ( "net/http" + "strconv" "time" "github.com/labstack/echo/v4" @@ -84,19 +85,30 @@ func (h *Handler) ArchiveTwitchVod(c echo.Context) error { // debug route to test converting chat files func (h *Handler) ConvertTwitchChat(c echo.Context) error { type Body struct { - LiveChatPath string `json:"live_chat_path"` - ChannelName string `json:"channel_name"` - VideoID string `json:"video_id"` - VideoExternalID string `json:"video_external_id"` - ChannelID int `json:"channel_id"` - PreviousVideoID string `json:"previous_video_id"` + LiveChatPath string `json:"live_chat_path"` + ChannelName string `json:"channel_name"` + VideoID string `json:"video_id"` + VideoExternalID string `json:"video_external_id"` + ChannelID int `json:"channel_id"` + PreviousVideoID string `json:"previous_video_id"` + FirstMessageEpoch string `json:"first_message_epoch"` } body := new(Body) if err := c.Bind(body); err != nil { return echo.NewHTTPError(http.StatusBadRequest, err.Error()) } - err := utils.ConvertTwitchLiveChatToTDLChat(body.LiveChatPath, body.ChannelName, body.VideoID, body.VideoExternalID, body.ChannelID, time.Now(), body.PreviousVideoID) + epoch, err := strconv.Atoi(body.FirstMessageEpoch) + if err != nil { + return echo.NewHTTPError(http.StatusBadRequest, err.Error()) + } + epochMicroseconds := int64(epoch) + seconds := epochMicroseconds / 1_000_000 + nanoseconds := (epochMicroseconds % 1_000_000) * 1_000 + + t := time.Unix(seconds, nanoseconds) + + err = utils.ConvertTwitchLiveChatToTDLChat(body.LiveChatPath, body.ChannelName, body.VideoID, body.VideoExternalID, body.ChannelID, t, body.PreviousVideoID) if err != nil { return echo.NewHTTPError(http.StatusInternalServerError, err.Error()) } diff --git a/internal/utils/chat.go b/internal/utils/chat.go index 21afccbd..3e031797 100644 --- a/internal/utils/chat.go +++ b/internal/utils/chat.go @@ -1,13 +1,23 @@ package utils -import "regexp" +import ( + "regexp" + "unicode" +) // find the substring position in a string. Supports passing an occurrence to find the Nth place of the substring in the string func findSubstringPositions(input string, substring string, occurrenceNumber int) (start int, end int, found bool) { - re := regexp.MustCompile(regexp.QuoteMeta(substring)) + var re *regexp.Regexp + if isAlphanumeric(substring) { + // add word boundaries for alphanumeric substrings + re = regexp.MustCompile(`\b` + regexp.QuoteMeta(substring) + `\b`) + } else { + // use exact match for non-alphanumeric substrings + re = regexp.MustCompile(regexp.QuoteMeta(substring)) + } matches := re.FindAllStringIndex(input, -1) - if occurrenceNumber <= len(matches) { + if occurrenceNumber > 0 && occurrenceNumber <= len(matches) { startIndex := matches[occurrenceNumber-1][0] endIndex := matches[occurrenceNumber-1][1] return startIndex, endIndex, true @@ -15,3 +25,13 @@ func findSubstringPositions(input string, substring string, occurrenceNumber int return -1, -1, false } + +// checks if the string contains only alphanumeric characters +func isAlphanumeric(str string) bool { + for _, char := range str { + if !unicode.IsLetter(char) && !unicode.IsDigit(char) { + return false + } + } + return true +} diff --git a/internal/utils/tdl.go b/internal/utils/tdl.go index f6882912..e9d00c3b 100644 --- a/internal/utils/tdl.go +++ b/internal/utils/tdl.go @@ -247,6 +247,10 @@ func ConvertTwitchLiveChatToTDLChat(path string, channelName string, videoID str formattedEmoteFragments = append(formattedEmoteFragments, fragment) formattedEmoteFragments = append(formattedEmoteFragments, emoteFragment) } else { + if emoteFragment.Pos1 == 0 { + log.Warn().Str("message_id", liveComment.MessageID).Msg("skipping invalid emote position") + continue + } fragmentText := tdlComment.Message.Body[emoteFragments[i-1].Pos2:emoteFragment.Pos1] fragment := Fragment{ Text: fragmentText,