RumbleDB · TaoDFang · Dec 11, 2023 · Dec 11, 2023 · Dec 11, 2023 · Dec 13, 2023
diff --git a/exercise01/Exercise01_SQL_Brush_Up.ipynb b/exercise01/Exercise01_SQL_Brush_Up.ipynb
@@ -49,7 +49,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -62,7 +62,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -72,9 +72,38 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " * postgresql://postgres:***@postgres:5432/discogs\n",
+      "1 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>version</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>PostgreSQL 13.4 (Debian 13.4-1.pgdg100+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 8.3.0-6) 8.3.0, 64-bit</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[('PostgreSQL 13.4 (Debian 13.4-1.pgdg100+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 8.3.0-6) 8.3.0, 64-bit',)]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "%%sql\n",
     "SELECT version();"
@@ -106,9 +135,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " * postgresql://postgres:***@postgres:5432/discogs\n",
+      "0 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>table_name</th>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "%%sql \n",
     "SELECT table_name\n",
@@ -126,9 +181,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " * postgresql://postgres:***@postgres:5432/discogs\n",
+      "0 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>table_name</th>\n",
+       "        <th>column_name</th>\n",
+       "        <th>data_type</th>\n",
+       "        <th>is_nullable</th>\n",
+       "        <th>ordinal_position</th>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "%%sql \n",
     "SELECT table_name, column_name, data_type, is_nullable, ordinal_position\n",
@@ -417,7 +502,7 @@
    "provenance": []
   },
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -431,7 +516,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.2"
+   "version": "3.9.6"
   }
  },
  "nbformat": 4,

diff --git a/pics/ACID.png b/pics/ACID.png
diff --git a/pics/lecture_review.png b/pics/lecture_review.png
diff --git a/pics/query_examle.png b/pics/query_examle.png
diff --git a/pics/query_plan.png b/pics/query_plan.png
diff --git a/pics/rest_scaleup.png b/pics/rest_scaleup.png
diff --git a/progress.md b/progress.md
@@ -0,0 +1,105 @@
+# 2023_12_11 introduction 
+
+initialise my onw exercise repository to correct commit -m 
+
+the excice use docker to building envrioemnt, but I should do everything step by myself.
+or learn the docker myself as mention in the introduction and then check what's the meaning of each dockerfile 
+"Self-study: Docker for your laptop, Azure for large-scale clusters"
+progress.md (END)
+
+![overview](pics/lecture_review.png)
+
+## exercise 
+ enviromemnt set up
+
+## docker 
+https://docs.google.com/document/d/17HJ9-ljkLxP_HdkZKHjpj4BH67jBDIzxaU0EKciUkS8/edit?pli=1#heading=h.cilq0rusrnzq
+basic usuage for this repository, check readme file 
+
+## exercise 
+the problem for course 2021 is that some materail seems not avaiable anymore , try course 2023 instead 
+ enviromemnt set up
+
+Basic Usage
+Check out this repository using git. In the folder of a particular exercise (that contains a docker-compose.yml file), run the following command in a terminal:
+docker-compose up
+
+In most if not all weeks, one of the services is a Jupyter notebook server with which you can use the exercise notebooks interactively. You can access the server by accessing http://localhost:8888 in your favorite browser.
+
+# 2023_12_13 lessons learnt:  SQL 
+this is basicl database knowledge (SQL)
+read provided textbook 
+
+Data independence : 
+    logical data mode : table (maily), doesnt change , other like trees, graph, cude 
+    physical storgae : various 
+directly use python and R is not best option as it dont follow the Data independence 
+
+Over architecture: 
+language:     SQL
+model  :      table 
+compute:      cpu 
+Storage:      disk 
+
+relational table: 
+Atrribute;column;Field;Property 
+Primary key;Row ID; Name
+Row;Businesss Object;Item;entity;Document;Record
+
+
+table as a (mathematic) relation :  a subset of the Cartesian product
+of the domains
+A relation R is  :
+1.  a set of atrribute 
+2.  a extension (set of tuple)
+tuple: more intuitive disvlae 
+S: Atrribute
+V: values
+
+rules of relational table:
+1: relational integrity:   all records have same atrributes 
+2: Atomic integrity (1st normal ): no sub-table 
+3: domain integrity : each column has same datatype/domain/schema
+
+relational algebra: 
+Set querys: Union, intersection, sbustrction ... 
+Filter queries: selection, projection(selectin attributes)...
+renaming  queries: 
+joining queries: cartesian product, join (merge by matches)
+grouping, sorting  ???
+
+Normal forms: 
+it means "best practice "
+to make sure consistences: 
+ * update anomaly 
+ * delete anomaly
+ * insert anomaly
+1st normal form (tabular) - the key (atomic integrity): 
+2nd normal form (not joined) - the whole key :  no partial dependency, the primary key fullly decide other atrribute 
+3rd normal form - nothing but the key : attributes only depend on the primary key 
+__but in the big data, we normally drop normal forms__
+
+SQL brush up 
+declarative language: tell what want,not how 
+pronounciatin of "SQL": See-kwel or just S-Q-L
+
+query plan
+![query plan](pics/query_plan.png)
+![one example](pics/query_examle.png)
+
+transaction : ACID
+Atomicity :???
+consistency 
+Isolation
+Durability 
+![acid](pics/ACID.png)
+__again in the big data, we normally drop normal forms__
+
+performance: 
+indices 
+OLTP : online transaction procssing , write intensive
+OLAP : online analytical processing, read-intensive
+Mind data shape 
+
+Data scale up 
+![rest of the lecture](pics/rest_scaleup.png)