From b19c20472c3d6d6bb53d2fe7cda191fb261f823f Mon Sep 17 00:00:00 2001 From: Mike Brown Date: Fri, 26 May 2023 20:54:54 +0000 Subject: [PATCH 1/3] Add Scanner option to expand variations (part 1 of 2) This commit is part of a series to add an optional ability to expand variations when utilizing Scanner to iterate over a .pgn file. This commit specifically introduces ScannerOpts to enable consumers of Scanner to specify different scanning behavior. The first intended use is ScannerOpts.ExpandVariations. Currently Scanner will strip out any variations that are present in a game defined within a .pgn file. This new option (non-default) when specified will instead allow Scanner consumers to specify that when iterating over a .pgn it should create a new instance of a chess game for every variation encountered. Additionally this commit refactors Scanner internals to deal with the fact that decoding a single game may return a plural number of Game instances rather than a singular. Code to actually actually make this plural will be in a followup commit. (cherry picked from commit 42e2e1674b876b6c1a5a420b0f37daab43f59126) --- pgn.go | 131 +++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 99 insertions(+), 32 deletions(-) diff --git a/pgn.go b/pgn.go index 416eef3..1ec022b 100644 --- a/pgn.go +++ b/pgn.go @@ -13,16 +13,28 @@ import ( // from concatenated PGN files. It is designed to // replace GamesFromPGN in order to handle very large // PGN database files such as https://database.lichess.org/. +type ScannerOpts struct { + ExpandVariations bool +} type Scanner struct { scanr *bufio.Scanner - game *Game + games []*Game err error + opts ScannerOpts } -// NewScanner returns a new scanner. +// NewScanner returns a new scanner with default options func NewScanner(r io.Reader) *Scanner { + defaultOpts := ScannerOpts{ExpandVariations: false} + + return NewScannerWithOptions(r, defaultOpts) +} + +// NewScanner returns a new scanner with explicit options +func NewScannerWithOptions(r io.Reader, o ScannerOpts) *Scanner { scanr := bufio.NewScanner(r) - return &Scanner{scanr: scanr} + g := make([]*Game, 0) + return &Scanner{scanr: scanr, opts: o, games: g} } type scanState int @@ -43,15 +55,18 @@ func (s *Scanner) Scan() bool { return false } s.err = nil + if len(s.games) > 0 { + return true + } var sb strings.Builder state := notInPGN - setGame := func() bool { - game, err := decodePGN(sb.String()) + setGames := func() bool { + games, err := decodePGNs(sb.String(), s.opts.ExpandVariations) if err != nil { s.err = err return false } - s.game = game + s.games = games return true } for { @@ -62,7 +77,7 @@ func (s *Scanner) Scan() bool { if s.err == nil { s.err = io.EOF } - return setGame() + return setGames() } line := strings.TrimSpace(s.scanr.Text()) isTagPair := strings.HasPrefix(line, "[") @@ -81,7 +96,7 @@ func (s *Scanner) Scan() bool { sb.WriteString(line + "\n") case inMoves: if line == "" { - return setGame() + return setGames() } sb.WriteString(line + "\n") } @@ -90,7 +105,14 @@ func (s *Scanner) Scan() bool { // Next returns the game from the most recent Scan. func (s *Scanner) Next() *Game { - return s.game + if len(s.games) == 0 { + return nil + } + + g := s.games[0] + s.games = s.games[1:] + + return g } // Err returns an error encountered during scanning. @@ -151,8 +173,21 @@ func (a multiDecoder) Decode(pos *Position, s string) (*Move, error) { } func decodePGN(pgn string) (*Game, error) { + gameList, err := decodePGNs(pgn, false) + if err != nil { + return nil, err + } + if len(gameList) != 1 { + return nil, fmt.Errorf("chess: pgn decode error unexpected game count %v", len(gameList)) + } + + return gameList[0], nil +} + +func decodePGNs(pgn string, expandVariations bool) ([]*Game, error) { + ret := []*Game{} tagPairs := getTagPairs(pgn) - moveComments, outcome, err := moveListWithComments(pgn) + moveListSet, err := moveListSetWithComments(pgn, expandVariations) if err != nil { return nil, err } @@ -169,23 +204,28 @@ func decodePGN(pgn string) (*Game, error) { } } gameFuncs = append(gameFuncs, TagPairs(tagPairs)) - g := NewGame(gameFuncs...) - g.ignoreAutomaticDraws = true - decoder := multiDecoder([]Decoder{AlgebraicNotation{}, LongAlgebraicNotation{}, UCINotation{}}) - for _, move := range moveComments { - m, err := decoder.Decode(g.Position(), move.MoveStr) - if err != nil { - return nil, fmt.Errorf("chess: pgn decode error %s on move %d", err.Error(), g.Position().moveCount) - } - if err := g.Move(m); err != nil { - return nil, fmt.Errorf("chess: pgn invalid move error %s on move %d", err.Error(), g.Position().moveCount) + + for idx, ml := range moveListSet.moveLists { + g := NewGame(gameFuncs...) + g.ignoreAutomaticDraws = true + decoder := multiDecoder([]Decoder{AlgebraicNotation{}, LongAlgebraicNotation{}, UCINotation{}}) + for _, move := range ml.moves { + m, err := decoder.Decode(g.Position(), move.MoveStr) + if err != nil { + return nil, fmt.Errorf("chess: pgn decode error %s on variation %d move %d", err.Error(), idx, g.Position().moveCount) + } + if err := g.Move(m); err != nil { + return nil, fmt.Errorf("chess: pgn invalid move error %s on variation %d move %d", err.Error(), idx, g.Position().moveCount) + } + g.comments = g.comments[:len(g.comments)-1] + g.comments = append(g.comments, move.Comments) } - g.comments = g.comments[:len(g.comments)-1] - g.comments = append(g.comments, move.Comments) + g.outcome = ml.outcome + + ret = append(ret, g) } - g.outcome = outcome - return g, nil + return ret, nil } func encodePGN(g *Game) string { @@ -237,16 +277,43 @@ type moveWithComment struct { Comments []string } +type moveListAndOutcome struct { + moves []moveWithComment + outcome Outcome +} + +type moveListSet struct { + moveLists []moveListAndOutcome +} + var moveListTokenRe = regexp.MustCompile(`(?:\d+\.)|(O-O(?:-O)?|\w*[abcdefgh][12345678]\w*(?:=[QRBN])?(?:\+|#)?)|(?:\{([^}]*)\})|(?:\([^)]*\))|(\*|0-1|1-0|1\/2-1\/2)`) -func moveListWithComments(pgn string) ([]moveWithComment, Outcome, error) { +func moveListSetWithComments(pgn string, expandVariations bool) (moveListSet, error) { + ret := moveListSet{ + moveLists: []moveListAndOutcome{}, + } + + if !expandVariations { + ml, err := moveListWithCommentsNoExpand(pgn) + if err != nil { + return ret, err + } + ret.moveLists = append(ret.moveLists, ml) + return ret, nil + } + + return ret, fmt.Errorf("unimplemented") +} + +func moveListWithCommentsNoExpand(pgn string) (moveListAndOutcome, error) { pgn = stripTagPairs(pgn) - var outcome Outcome - moves := []moveWithComment{} + ret := moveListAndOutcome{ + moves: []moveWithComment{}, + } // moveListTokenRe doesn't work w/ nested variations pgn, err := stripVariations(pgn) if err != nil { - return moves, outcome, err + return ret, err } for _, match := range moveListTokenRe.FindAllStringSubmatch(pgn, -1) { @@ -256,19 +323,19 @@ func moveListWithComments(pgn string) ([]moveWithComment, Outcome, error) { } if outcomeText != "" { - outcome = Outcome(outcomeText) + ret.outcome = Outcome(outcomeText) break } if commentText != "" { - moves[len(moves)-1].Comments = append(moves[len(moves)-1].Comments, strings.TrimSpace(commentText)) + ret.moves[len(ret.moves)-1].Comments = append(ret.moves[len(ret.moves)-1].Comments, strings.TrimSpace(commentText)) } if move != "" { - moves = append(moves, moveWithComment{MoveStr: move}) + ret.moves = append(ret.moves, moveWithComment{MoveStr: move}) } } - return moves, outcome, nil + return ret, nil } func stripTagPairs(pgn string) string { From d48f73c494dcd1a5e0e07b1b0be1d7f257d76e44 Mon Sep 17 00:00:00 2001 From: Mike Brown Date: Fri, 26 May 2023 22:14:52 +0000 Subject: [PATCH 2/3] Add Scanner option to expand variations (part 2 of 2) This commit is part of a series to add an optional ability to expand variations when utilizing Scanner to iterate over a .pgn file. This commit specifically implements moveListSetWithComments() when expandVariations==true. Additionally it adds a unit test to verify Scanner behaves as expected when ExpandVariations==true. (cherry picked from commit 666f43f23e15c9c9a22be1ee3477719ea9829bb2) --- pgn.go | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++- pgn_test.go | 37 +++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 1 deletion(-) diff --git a/pgn.go b/pgn.go index 1ec022b..35af3c9 100644 --- a/pgn.go +++ b/pgn.go @@ -302,7 +302,7 @@ func moveListSetWithComments(pgn string, expandVariations bool) (moveListSet, er return ret, nil } - return ret, fmt.Errorf("unimplemented") + return moveListSetExpanded(pgn) } func moveListWithCommentsNoExpand(pgn string) (moveListAndOutcome, error) { @@ -338,6 +338,66 @@ func moveListWithCommentsNoExpand(pgn string) (moveListAndOutcome, error) { return ret, nil } +var moveNumRe = regexp.MustCompile(`(?:\d+\.+)?(.*)`) + +func moveListSetExpanded(pgn string) (moveListSet, error) { + firstGame := moveListAndOutcome{ + moves: []moveWithComment{}, + } + ret := moveListSet{ + moveLists: []moveListAndOutcome{firstGame}, + } + + pgn = stripTagPairs(pgn) + // remove comments @todo need to add comments back in + pgn = removeSection("{", "}", pgn) + // remove line breaks + pgn = strings.Replace(pgn, "\n", " ", -1) + pgn = strings.ReplaceAll(pgn, "(", "( ") + pgn = strings.ReplaceAll(pgn, ")", " )") + + moveListIdx := 0 + moveListIdxStack := make([]int, 0) + list := strings.Split(pgn, " ") + + for _, move := range list { + move = strings.TrimSpace(move) + switch move { + case string(NoOutcome), string(WhiteWon), string(BlackWon), string(Draw): + ret.moveLists[moveListIdx].outcome = Outcome(move) + case "": + case "(": + // begin new variation + moveListIdxStack = append(moveListIdxStack, moveListIdx) + newIdx := len(ret.moveLists) + numMoves := len(ret.moveLists[moveListIdx].moves) - 1 + newGame := moveListAndOutcome{} + newGame.moves = make([]moveWithComment, numMoves) + copy(newGame.moves, ret.moveLists[moveListIdx].moves) + ret.moveLists = append(ret.moveLists, newGame) + moveListIdx = newIdx + + case ")": + // end current variation + stackSize := len(moveListIdxStack) + if stackSize == 0 { + return ret, fmt.Errorf("Failed to parse variation") + } + moveListIdx = moveListIdxStack[stackSize-1] + moveListIdxStack = moveListIdxStack[:stackSize-1] + default: + results := moveNumRe.FindStringSubmatch(move) + tmp := moveWithComment{} + if len(results) == 2 && results[1] != "" { + tmp.MoveStr = results[1] + ret.moveLists[moveListIdx].moves = append(ret.moveLists[moveListIdx].moves, tmp) + } + } + } + + return ret, nil +} + func stripTagPairs(pgn string) string { lines := strings.Split(pgn, "\n") cp := []string{} @@ -389,3 +449,14 @@ func stripVariations(pgn string) (string, error) { return ret.String(), nil } + +func removeSection(leftChar, rightChar, s string) string { + r := regexp.MustCompile(leftChar + ".*?" + rightChar) + for { + i := r.FindStringIndex(s) + if i == nil { + return s + } + s = s[0:i[0]] + s[i[1]:] + } +} diff --git a/pgn_test.go b/pgn_test.go index 36d3232..9632a9e 100644 --- a/pgn_test.go +++ b/pgn_test.go @@ -1,6 +1,8 @@ package chess import ( + "fmt" + "io" "io/ioutil" "os" "strings" @@ -201,6 +203,41 @@ func TestScannerWithFromPosFENs(t *testing.T) { } } +func TestScannerWithNestedAndExpand(t *testing.T) { + fname := "fixtures/pgns/0013.pgn" + f, err := os.Open(fname) + if err != nil { + panic(err) + } + defer f.Close() + + scannerOpts := ScannerOpts{ExpandVariations: true} + scanner := NewScannerWithOptions(f, scannerOpts) + games := []*Game{} + for scanner.Scan() { + err = scanner.Err() + if err != nil && err != io.EOF { + t.Fatalf(fname+" Unexpected non-nil/non-EOF err %v", err) + } + game := scanner.Next() + moveList := game.Moves() + if len(moveList) == 0 { + continue + } + games = append(games, game) + } + err = scanner.Err() + if err != io.EOF { + t.Fatalf(fname+" Unexpected non-EOF err %v", err) + } + if len(games) != 10 { + for idx, g := range games { + fmt.Printf("Parsed game %v: %v\n\n", idx, g) + } + t.Fatalf(fname+" expected 10 games but got %d", len(games)) + } +} + func BenchmarkPGN(b *testing.B) { pgn := mustParsePGN("fixtures/pgns/0001.pgn") b.ResetTimer() From 54ebdbfc73b977514028a4eda1fbd6989d79516c Mon Sep 17 00:00:00 2001 From: Mike Brown Date: Tue, 26 Nov 2024 14:16:45 -0500 Subject: [PATCH 3/3] Scanner: Add test case combining from position & expand variations This commit just adds a new pgn test case that ensures the combination of a PGN with a specified starting position and the ExpandVariations scanner option interoprate. --- pgn_test.go | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/pgn_test.go b/pgn_test.go index 9632a9e..d4361ee 100644 --- a/pgn_test.go +++ b/pgn_test.go @@ -203,6 +203,45 @@ func TestScannerWithFromPosFENs(t *testing.T) { } } +func TestScannerWithFromPosFENsExpanded(t *testing.T) { + finalPositions := []string{ + "rnbqkbnr/pp2pppp/2p5/3p4/3PP3/5P2/PPP3PP/RNBQKBNR b KQkq - 0 3", + "r2qkb1r/pp1n1ppp/2p2n2/4p3/2BPP1b1/2P2N2/PP4PP/RNBQ1RK1 b kq - 0 8", + "rnbqkbnr/pp3ppp/2p5/8/2BpP3/5N2/PPP3PP/RNBQK2R b KQkq - 1 6", + "rnbqk2r/pp2nppp/2p1p3/3p4/1b1PP3/2NB1P2/PPPB2PP/R2QK1NR b KQkq - 5 6", + "rnbqkb1r/pp3ppp/2p1pn2/3pP3/3P4/2N2P2/PPP3PP/R1BQKBNR b KQkq - 0 5", + "rnbqk1nr/pp2ppbp/2p3p1/3p4/3PP3/2N1BP2/PPP3PP/R2QKBNR b KQkq - 3 5", + "rnb1kbnr/pp3ppp/1qp5/8/3NP3/2N5/PPP3PP/R1BQKB1R b KQkq - 0 7", + } + fname := "fixtures/pgns/0014.pgn" + f, err := os.Open(fname) + if err != nil { + panic(err) + } + defer f.Close() + + scannerOpts := ScannerOpts{ExpandVariations: true} + scanner := NewScannerWithOptions(f, scannerOpts) + games := []*Game{} + for idx := 0; scanner.Scan(); { + game := scanner.Next() + if len(game.moves) == 0 { + continue + } + finalPos := game.Position().String() + if finalPos != finalPositions[idx] { + t.Fatalf(fname+" game %v expected final pos %v but got %v", idx, + finalPositions[idx], finalPos) + } + games = append(games, game) + idx++ + } + if len(games) != len(finalPositions) { + t.Fatalf(fname+" expected %v games but got %v", len(finalPositions), + len(games)) + } +} + func TestScannerWithNestedAndExpand(t *testing.T) { fname := "fixtures/pgns/0013.pgn" f, err := os.Open(fname)