Skip to content

Commit

Permalink
Move Dockerfile parsing to a dedicated package
Browse files Browse the repository at this point in the history
Also, add a bunch of test cases / code coverage
  • Loading branch information
tianon committed Jan 9, 2025
1 parent 98f6610 commit b8fae83
Show file tree
Hide file tree
Showing 4 changed files with 262 additions and 120 deletions.
119 changes: 7 additions & 112 deletions cmd/bashbrew/docker.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package main

import (
"bufio"
"bytes"
"crypto/sha256"
"encoding/hex"
Expand All @@ -10,21 +9,13 @@ import (
"os"
"os/exec"
"path"
"strconv"
"strings"

"github.com/docker-library/bashbrew/manifest"
"github.com/docker-library/bashbrew/pkg/dockerfile"
"github.com/urfave/cli"
)

type dockerfileMetadata struct {
StageFroms []string // every image "FROM" instruction value (or the parent stage's FROM value in the case of a named stage)
StageNames []string // the name of any named stage (in order)
StageNameFroms map[string]string // map of stage names to FROM values (or the parent stage's FROM value in the case of a named stage), useful for resolving stage names to FROM values

Froms []string // every "FROM" or "COPY --from=xxx" value (minus named and/or numbered stages in the case of "--from=")
}

// this returns the "FROM" value for the last stage (which essentially determines the "base" for the final published image)
func (r Repo) ArchLastStageFrom(arch string, entry *manifest.Manifest2822Entry) (string, error) {
dockerfileMeta, err := r.archDockerfileMetadata(arch, entry)
Expand All @@ -46,15 +37,15 @@ func (r Repo) ArchDockerFroms(arch string, entry *manifest.Manifest2822Entry) ([
return dockerfileMeta.Froms, nil
}

func (r Repo) dockerfileMetadata(entry *manifest.Manifest2822Entry) (*dockerfileMetadata, error) {
func (r Repo) dockerfileMetadata(entry *manifest.Manifest2822Entry) (*dockerfile.Metadata, error) {
return r.archDockerfileMetadata(arch, entry)
}

var dockerfileMetadataCache = map[string]*dockerfileMetadata{}
var dockerfileMetadataCache = map[string]*dockerfile.Metadata{}

func (r Repo) archDockerfileMetadata(arch string, entry *manifest.Manifest2822Entry) (*dockerfileMetadata, error) {
func (r Repo) archDockerfileMetadata(arch string, entry *manifest.Manifest2822Entry) (*dockerfile.Metadata, error) {
if builder := entry.ArchBuilder(arch); builder == "oci-import" {
return &dockerfileMetadata{
return &dockerfile.Metadata{
StageFroms: []string{
"scratch",
},
Expand All @@ -79,12 +70,12 @@ func (r Repo) archDockerfileMetadata(arch string, entry *manifest.Manifest2822En
return meta, nil
}

dockerfile, err := gitShow(commit, dockerfileFile)
df, err := gitShow(commit, dockerfileFile)
if err != nil {
return nil, cli.NewMultiError(fmt.Errorf(`failed "git show" for %q from commit %q`, dockerfileFile, commit), err)
}

meta, err := parseDockerfileMetadata(dockerfile)
meta, err := dockerfile.Parse(df)
if err != nil {
return nil, cli.NewMultiError(fmt.Errorf(`failed parsing Dockerfile metadata for %q from commit %q`, dockerfileFile, commit), err)
}
Expand All @@ -93,102 +84,6 @@ func (r Repo) archDockerfileMetadata(arch string, entry *manifest.Manifest2822En
return meta, nil
}

func parseDockerfileMetadata(dockerfile string) (*dockerfileMetadata, error) {
meta := &dockerfileMetadata{
// panic: assignment to entry in nil map
StageNameFroms: map[string]string{},
// (nil slices work fine)
}

scanner := bufio.NewScanner(strings.NewReader(dockerfile))
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())

if line == "" {
// ignore blank lines
continue
}

if line[0] == '#' {
// TODO handle "escape" parser directive
// TODO handle "syntax" parser directive -- explode appropriately (since custom syntax invalidates our Dockerfile parsing)
// ignore comments
continue
}

// handle line continuations
// (TODO see note above regarding "escape" parser directive)
for line[len(line)-1] == '\\' && scanner.Scan() {
nextLine := strings.TrimSpace(scanner.Text())
if nextLine == "" || nextLine[0] == '#' {
// ignore blank lines and comments
continue
}
line = line[0:len(line)-1] + nextLine
}

fields := strings.Fields(line)
if len(fields) < 1 {
// must be a much more complex empty line??
continue
}
instruction := strings.ToUpper(fields[0])

// TODO balk at ARG / $ in from values

switch instruction {
case "FROM":
from := fields[1]

if stageFrom, ok := meta.StageNameFroms[from]; ok {
// if this is a valid stage name, we should resolve it back to the original FROM value of that previous stage (we don't care about inter-stage dependencies for the purposes of either tag dependency calculation or tag building -- just how many there are and what external things they require)
from = stageFrom
}

// make sure to add ":latest" if it's implied
from = latestizeRepoTag(from)

meta.StageFroms = append(meta.StageFroms, from)
meta.Froms = append(meta.Froms, from)

if len(fields) == 4 && strings.ToUpper(fields[2]) == "AS" {
stageName := fields[3]
meta.StageNames = append(meta.StageNames, stageName)
meta.StageNameFroms[stageName] = from
}
case "COPY":
for _, arg := range fields[1:] {
if !strings.HasPrefix(arg, "--") {
// doesn't appear to be a "flag"; time to bail!
break
}
if !strings.HasPrefix(arg, "--from=") {
// ignore any flags we're not interested in
continue
}
from := arg[len("--from="):]

if stageFrom, ok := meta.StageNameFroms[from]; ok {
// see note above regarding stage names in FROM
from = stageFrom
} else if stageNumber, err := strconv.Atoi(from); err == nil && stageNumber < len(meta.StageFroms) {
// must be a stage number, we should resolve it too
from = meta.StageFroms[stageNumber]
}

// make sure to add ":latest" if it's implied
from = latestizeRepoTag(from)

meta.Froms = append(meta.Froms, from)
}
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
return meta, nil
}

func (r Repo) DockerCacheName(entry *manifest.Manifest2822Entry) (string, error) {
cacheHash, err := r.dockerCacheHash(entry)
if err != nil {
Expand Down
8 changes: 0 additions & 8 deletions cmd/bashbrew/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"path"
"path/filepath"
"sort"
"strings"

"github.com/docker-library/bashbrew/manifest"
)
Expand Down Expand Up @@ -39,13 +38,6 @@ func repos(all bool, args ...string) ([]string, error) {
return ret, nil
}

func latestizeRepoTag(repoTag string) string {
if repoTag != "scratch" && strings.IndexRune(repoTag, ':') < 0 {
return repoTag + ":latest"
}
return repoTag
}

type Repo struct {
RepoName string
TagName string
Expand Down
124 changes: 124 additions & 0 deletions pkg/dockerfile/parse.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
package dockerfile

import (
"bufio"
"io"
"strconv"
"strings"
)

type Metadata struct {
StageFroms []string // every image "FROM" instruction value (or the parent stage's FROM value in the case of a named stage)
StageNames []string // the name of any named stage (in order)
StageNameFroms map[string]string // map of stage names to FROM values (or the parent stage's FROM value in the case of a named stage), useful for resolving stage names to FROM values

Froms []string // every "FROM" or "COPY --from=xxx" value (minus named and/or numbered stages in the case of "--from=")
}

func Parse(dockerfile string) (*Metadata, error) {
return ParseReader(strings.NewReader(dockerfile))
}

func ParseReader(dockerfile io.Reader) (*Metadata, error) {
meta := &Metadata{
// panic: assignment to entry in nil map
StageNameFroms: map[string]string{},
// (nil slices work fine)
}

scanner := bufio.NewScanner(dockerfile)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())

if line == "" {
// ignore blank lines
continue
}

if line[0] == '#' {
// TODO handle "escape" parser directive
// TODO handle "syntax" parser directive -- explode appropriately (since custom syntax invalidates our Dockerfile parsing)
// ignore comments
continue
}

// handle line continuations
// (TODO see note above regarding "escape" parser directive)
for line[len(line)-1] == '\\' && scanner.Scan() {
nextLine := strings.TrimSpace(scanner.Text())
if nextLine == "" || nextLine[0] == '#' {
// ignore blank lines and comments
continue
}
line = line[0:len(line)-1] + nextLine
}

fields := strings.Fields(line)
if len(fields) < 1 {
// must be a much more complex empty line??
continue
}
instruction := strings.ToUpper(fields[0])

// TODO balk at ARG / $ in from values

switch instruction {
case "FROM":
from := fields[1]

if stageFrom, ok := meta.StageNameFroms[from]; ok {
// if this is a valid stage name, we should resolve it back to the original FROM value of that previous stage (we don't care about inter-stage dependencies for the purposes of either tag dependency calculation or tag building -- just how many there are and what external things they require)
from = stageFrom
}

// make sure to add ":latest" if it's implied
from = latestizeRepoTag(from)

meta.StageFroms = append(meta.StageFroms, from)
meta.Froms = append(meta.Froms, from)

if len(fields) == 4 && strings.ToUpper(fields[2]) == "AS" {
stageName := fields[3]
meta.StageNames = append(meta.StageNames, stageName)
meta.StageNameFroms[stageName] = from
}

case "COPY":
for _, arg := range fields[1:] {
if !strings.HasPrefix(arg, "--") {
// doesn't appear to be a "flag"; time to bail!
break
}
if !strings.HasPrefix(arg, "--from=") {
// ignore any flags we're not interested in
continue
}
from := arg[len("--from="):]

if stageFrom, ok := meta.StageNameFroms[from]; ok {
// see note above regarding stage names in FROM
from = stageFrom
} else if stageNumber, err := strconv.Atoi(from); err == nil && stageNumber < len(meta.StageFroms) {
// must be a stage number, we should resolve it too
from = meta.StageFroms[stageNumber]
}

// make sure to add ":latest" if it's implied
from = latestizeRepoTag(from)

meta.Froms = append(meta.Froms, from)
}
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
return meta, nil
}

func latestizeRepoTag(repoTag string) string {
if repoTag != "scratch" && strings.IndexRune(repoTag, ':') < 0 {
return repoTag + ":latest"
}
return repoTag
}
Loading

0 comments on commit b8fae83

Please sign in to comment.