Skip to content

Commit

Permalink
DB schema: upgrade most integers types to bigint
Browse files Browse the repository at this point in the history
To avoid data errors with large integers, as it is starting to happen for the
ctags.id column. The only left integer (not bigint) columns are stuff like line
position in the ctags table, and counters for generally small quantities, e.g.,
source/binary package amounts.

Commit includes both changes to models.py and the SQL upgrade script.
  • Loading branch information
zacchiro committed Dec 20, 2015
1 parent 6dda0db commit fdf12a4
Show file tree
Hide file tree
Showing 2 changed files with 154 additions and 64 deletions.
89 changes: 89 additions & 0 deletions debsources/migrate/010-to-011.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
ALTER TABLE ctags ALTER COLUMN id TYPE BIGINT;

ALTER TABLE package_names ALTER COLUMN id TYPE BIGINT;

ALTER TABLE packages
ALTER COLUMN id TYPE BIGINT,
ALTER COLUMN name_id TYPE BIGINT;

ALTER TABLE suites
ALTER COLUMN id TYPE BIGINT,
ALTER COLUMN package_id TYPE BIGINT;

ALTER TABLE files
ALTER COLUMN id TYPE BIGINT,
ALTER COLUMN package_id TYPE BIGINT;

ALTER TABLE checksums
ALTER COLUMN id TYPE BIGINT,
ALTER COLUMN package_id TYPE BIGINT,
ALTER COLUMN file_id TYPE BIGINT;

ALTER TABLE binary_names ALTER COLUMN id TYPE BIGINT;

ALTER TABLE binaries
ALTER COLUMN id TYPE BIGINT,
ALTER COLUMN name_id TYPE BIGINT,
ALTER COLUMN package_id TYPE BIGINT;

ALTER TABLE sloccounts
ALTER COLUMN id TYPE BIGINT,
ALTER COLUMN package_id TYPE BIGINT,
ALTER COLUMN count TYPE BIGINT;

ALTER TABLE ctags
ALTER COLUMN package_id TYPE BIGINT,
ALTER COLUMN file_id TYPE BIGINT;

ALTER TABLE metrics
ALTER COLUMN id TYPE BIGINT,
ALTER COLUMN package_id TYPE BIGINT,
ALTER COLUMN value_ TYPE BIGINT;

ALTER TABLE history_size
ALTER COLUMN disk_usage TYPE BIGINT,
ALTER COLUMN source_files TYPE BIGINT,
ALTER COLUMN ctags TYPE BIGINT;

ALTER TABLE history_sloccount
ALTER COLUMN lang_ada TYPE BIGINT,
ALTER COLUMN lang_ansic TYPE BIGINT,
ALTER COLUMN lang_asm TYPE BIGINT,
ALTER COLUMN lang_awk TYPE BIGINT,
ALTER COLUMN lang_cobol TYPE BIGINT,
ALTER COLUMN lang_cpp TYPE BIGINT,
ALTER COLUMN lang_cs TYPE BIGINT,
ALTER COLUMN lang_csh TYPE BIGINT,
ALTER COLUMN lang_erlang TYPE BIGINT,
ALTER COLUMN lang_exp TYPE BIGINT,
ALTER COLUMN lang_f90 TYPE BIGINT,
ALTER COLUMN lang_fortran TYPE BIGINT,
ALTER COLUMN lang_haskell TYPE BIGINT,
ALTER COLUMN lang_java TYPE BIGINT,
ALTER COLUMN lang_jsp TYPE BIGINT,
ALTER COLUMN lang_lex TYPE BIGINT,
ALTER COLUMN lang_lisp TYPE BIGINT,
ALTER COLUMN lang_makefile TYPE BIGINT,
ALTER COLUMN lang_ml TYPE BIGINT,
ALTER COLUMN lang_modula3 TYPE BIGINT,
ALTER COLUMN lang_objc TYPE BIGINT,
ALTER COLUMN lang_pascal TYPE BIGINT,
ALTER COLUMN lang_perl TYPE BIGINT,
ALTER COLUMN lang_php TYPE BIGINT,
ALTER COLUMN lang_python TYPE BIGINT,
ALTER COLUMN lang_ruby TYPE BIGINT,
ALTER COLUMN lang_sed TYPE BIGINT,
ALTER COLUMN lang_sh TYPE BIGINT,
ALTER COLUMN lang_sql TYPE BIGINT,
ALTER COLUMN lang_tcl TYPE BIGINT,
ALTER COLUMN lang_vhdl TYPE BIGINT,
ALTER COLUMN lang_xml TYPE BIGINT,
ALTER COLUMN lang_yacc TYPE BIGINT;

ALTER TABLE copyright
ALTER COLUMN id TYPE BIGINT,
ALTER COLUMN file_id TYPE BIGINT;

ALTER TABLE history_copyright
ALTER COLUMN id TYPE BIGINT,
ALTER COLUMN files TYPE BIGINT;
129 changes: 65 additions & 64 deletions debsources/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from sqlalchemy import UniqueConstraint, PrimaryKeyConstraint
from sqlalchemy import Index
from sqlalchemy import Boolean, Date, DateTime, Integer, LargeBinary, String
from sqlalchemy.dialects.postgresql import BIGINT
from sqlalchemy import Enum
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base
Expand All @@ -26,14 +27,14 @@


# used for migrations, see scripts under debsources/migrate/
DB_SCHEMA_VERSION = 10
DB_SCHEMA_VERSION = 11


class PackageName(Base):
""" a source package name """
__tablename__ = 'package_names'

id = Column(Integer, primary_key=True)
id = Column(BIGINT, primary_key=True)
name = Column(String, index=True, unique=True)
versions = relationship("Package", backref="name",
cascade="all, delete-orphan",
Expand All @@ -57,9 +58,9 @@ class Package(Base):
""" a (versioned) source package """
__tablename__ = 'packages'

id = Column(Integer, primary_key=True)
id = Column(BIGINT, primary_key=True)
version = Column(String, index=True)
name_id = Column(Integer,
name_id = Column(BIGINT,
ForeignKey('package_names.id', ondelete="CASCADE"),
index=True, nullable=False)
area = Column(String(8), index=True) # main, contrib, non-free
Expand Down Expand Up @@ -95,8 +96,8 @@ class Suite(Base):
__tablename__ = 'suites'
__table_args__ = (UniqueConstraint('package_id', 'suite'),)

id = Column(Integer, primary_key=True)
package_id = Column(Integer,
id = Column(BIGINT, primary_key=True)
package_id = Column(BIGINT,
ForeignKey('packages.id', ondelete="CASCADE"),
index=True, nullable=False)
suite = Column(String, index=True)
Expand Down Expand Up @@ -147,8 +148,8 @@ class File(Base):
__tablename__ = 'files'
__table_args__ = (UniqueConstraint('package_id', 'path'),)

id = Column(Integer, primary_key=True)
package_id = Column(Integer,
id = Column(BIGINT, primary_key=True)
package_id = Column(BIGINT,
ForeignKey('packages.id', ondelete="CASCADE"),
index=True, nullable=False)
path = Column(LargeBinary, index=True, # path/whitin/source/pkg
Expand All @@ -163,11 +164,11 @@ class Checksum(Base):
__tablename__ = 'checksums'
__table_args__ = (UniqueConstraint('package_id', 'file_id'),)

id = Column(Integer, primary_key=True)
package_id = Column(Integer,
id = Column(BIGINT, primary_key=True)
package_id = Column(BIGINT,
ForeignKey('packages.id', ondelete="CASCADE"),
index=True, nullable=False)
file_id = Column(Integer,
file_id = Column(BIGINT,
ForeignKey('files.id', ondelete="CASCADE"),
index=True, nullable=False)
sha256 = Column(String(64), nullable=False, index=True)
Expand All @@ -181,7 +182,7 @@ def __init__(self, version, file_id, sha256):
class BinaryName(Base):
__tablename__ = 'binary_names'

id = Column(Integer, primary_key=True)
id = Column(BIGINT, primary_key=True)
name = Column(String, index=True, unique=True)
versions = relationship("Binary", backref="name",
cascade="all, delete-orphan",
Expand All @@ -197,12 +198,12 @@ def __repr__(self):
class Binary(Base):
__tablename__ = 'binaries'

id = Column(Integer, primary_key=True)
id = Column(BIGINT, primary_key=True)
version = Column(String)
name_id = Column(Integer,
name_id = Column(BIGINT,
ForeignKey('binary_names.id', ondelete="CASCADE"),
index=True, nullable=False)
package_id = Column(Integer,
package_id = Column(BIGINT,
ForeignKey('packages.id', ondelete="CASCADE"),
index=True, nullable=False)

Expand All @@ -217,14 +218,14 @@ class SlocCount(Base):
__tablename__ = 'sloccounts'
__table_args__ = (UniqueConstraint('package_id', 'language'),)

id = Column(Integer, primary_key=True)
package_id = Column(Integer,
id = Column(BIGINT, primary_key=True)
package_id = Column(BIGINT,
ForeignKey('packages.id', ondelete="CASCADE"),
index=True, nullable=False)
language = Column(Enum(*SLOCCOUNT_LANGUAGES, name="language_names"),
# TODO rename enum s/language_names/sloccount/languages
nullable=False)
count = Column(Integer, nullable=False)
count = Column(BIGINT, nullable=False)

def __init__(self, version, lang, locs):
self.package_id = version.id
Expand All @@ -235,12 +236,12 @@ def __init__(self, version, lang, locs):
class Ctag(Base):
__tablename__ = 'ctags'

id = Column(Integer, primary_key=True)
package_id = Column(Integer,
id = Column(BIGINT, primary_key=True)
package_id = Column(BIGINT,
ForeignKey('packages.id', ondelete="CASCADE"),
index=True, nullable=False)
tag = Column(String, nullable=False, index=True)
file_id = Column(Integer,
file_id = Column(BIGINT,
ForeignKey('files.id', ondelete="CASCADE"),
index=True, nullable=False)
line = Column(Integer, nullable=False)
Expand Down Expand Up @@ -278,12 +279,12 @@ class Metric(Base):
__tablename__ = 'metrics'
__table_args__ = (UniqueConstraint('package_id', 'metric'),)

id = Column(Integer, primary_key=True)
package_id = Column(Integer,
id = Column(BIGINT, primary_key=True)
package_id = Column(BIGINT,
ForeignKey('packages.id', ondelete="CASCADE"),
index=True, nullable=False)
metric = Column(Enum(*METRIC_TYPES, name="metric_types"), nullable=False)
value = Column("value_", Integer, nullable=False)
value = Column("value_", BIGINT, nullable=False)

def __init__(self, version, metric, value):
self.package_id = version.id
Expand All @@ -305,10 +306,10 @@ class HistorySize(Base):
source_packages = Column(Integer, nullable=True)
binary_packages = Column(Integer, nullable=True)

disk_usage = Column(Integer, nullable=True)
source_files = Column(Integer, nullable=True)
disk_usage = Column(BIGINT, nullable=True)
source_files = Column(BIGINT, nullable=True)

ctags = Column(Integer, nullable=True)
ctags = Column(BIGINT, nullable=True)

def __init__(self, suite, timestamp):
self.suite = suite
Expand All @@ -327,39 +328,39 @@ class HistorySlocCount(Base):
index=True, nullable=False)

# see consts.SLOCCOUNT_LANGUAGES for the language list rationale
lang_ada = Column(Integer, nullable=True)
lang_ansic = Column(Integer, nullable=True)
lang_asm = Column(Integer, nullable=True)
lang_awk = Column(Integer, nullable=True)
lang_cobol = Column(Integer, nullable=True)
lang_cpp = Column(Integer, nullable=True)
lang_cs = Column(Integer, nullable=True)
lang_csh = Column(Integer, nullable=True)
lang_erlang = Column(Integer, nullable=True)
lang_exp = Column(Integer, nullable=True)
lang_f90 = Column(Integer, nullable=True)
lang_fortran = Column(Integer, nullable=True)
lang_haskell = Column(Integer, nullable=True)
lang_java = Column(Integer, nullable=True)
lang_jsp = Column(Integer, nullable=True)
lang_lex = Column(Integer, nullable=True)
lang_lisp = Column(Integer, nullable=True)
lang_makefile = Column(Integer, nullable=True)
lang_ml = Column(Integer, nullable=True)
lang_modula3 = Column(Integer, nullable=True)
lang_objc = Column(Integer, nullable=True)
lang_pascal = Column(Integer, nullable=True)
lang_perl = Column(Integer, nullable=True)
lang_php = Column(Integer, nullable=True)
lang_python = Column(Integer, nullable=True)
lang_ruby = Column(Integer, nullable=True)
lang_sed = Column(Integer, nullable=True)
lang_sh = Column(Integer, nullable=True)
lang_sql = Column(Integer, nullable=True)
lang_tcl = Column(Integer, nullable=True)
lang_vhdl = Column(Integer, nullable=True)
lang_xml = Column(Integer, nullable=True)
lang_yacc = Column(Integer, nullable=True)
lang_ada = Column(BIGINT, nullable=True)
lang_ansic = Column(BIGINT, nullable=True)
lang_asm = Column(BIGINT, nullable=True)
lang_awk = Column(BIGINT, nullable=True)
lang_cobol = Column(BIGINT, nullable=True)
lang_cpp = Column(BIGINT, nullable=True)
lang_cs = Column(BIGINT, nullable=True)
lang_csh = Column(BIGINT, nullable=True)
lang_erlang = Column(BIGINT, nullable=True)
lang_exp = Column(BIGINT, nullable=True)
lang_f90 = Column(BIGINT, nullable=True)
lang_fortran = Column(BIGINT, nullable=True)
lang_haskell = Column(BIGINT, nullable=True)
lang_java = Column(BIGINT, nullable=True)
lang_jsp = Column(BIGINT, nullable=True)
lang_lex = Column(BIGINT, nullable=True)
lang_lisp = Column(BIGINT, nullable=True)
lang_makefile = Column(BIGINT, nullable=True)
lang_ml = Column(BIGINT, nullable=True)
lang_modula3 = Column(BIGINT, nullable=True)
lang_objc = Column(BIGINT, nullable=True)
lang_pascal = Column(BIGINT, nullable=True)
lang_perl = Column(BIGINT, nullable=True)
lang_php = Column(BIGINT, nullable=True)
lang_python = Column(BIGINT, nullable=True)
lang_ruby = Column(BIGINT, nullable=True)
lang_sed = Column(BIGINT, nullable=True)
lang_sh = Column(BIGINT, nullable=True)
lang_sql = Column(BIGINT, nullable=True)
lang_tcl = Column(BIGINT, nullable=True)
lang_vhdl = Column(BIGINT, nullable=True)
lang_xml = Column(BIGINT, nullable=True)
lang_yacc = Column(BIGINT, nullable=True)

def __init__(self, suite, timestamp):
self.suite = suite
Expand All @@ -370,8 +371,8 @@ class FileCopyright(Base):

__tablename__ = 'copyright'

id = Column(Integer, primary_key=True)
file_id = Column(Integer,
id = Column(BIGINT, primary_key=True)
file_id = Column(BIGINT,
ForeignKey('files.id', ondelete="CASCADE"),
index=True, nullable=False)
oracle = Column(Enum(*COPYRIGHT_ORACLES, name="copyright_oracles"),
Expand All @@ -395,13 +396,13 @@ class HistoryCopyright(Base):

__tablename__ = 'history_copyright'

id = Column(Integer, primary_key=True)
id = Column(BIGINT, primary_key=True)
timestamp = Column(DateTime(timezone=False),
index=True, nullable=False)
suite = Column(String, # suite == "ALL" means totals
index=True, nullable=False)
license = Column(String)
files = Column(Integer, nullable=True)
files = Column(BIGINT, nullable=True)

def __init__(self, suite, timestamp):
self.suite = suite
Expand Down

0 comments on commit fdf12a4

Please sign in to comment.