diff --git a/.nojekyll b/.nojekyll index f111065..1a70c8e 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -1e284f29 \ No newline at end of file +be2f75d2 \ No newline at end of file diff --git a/00_introduction.html b/00_introduction.html index 04438dc..06c0a19 100644 --- a/00_introduction.html +++ b/00_introduction.html @@ -346,7 +346,7 @@

Learning modules

  1. Git, Pull Requests, and code reviews
  2. Python functions, classes, and modules
  3. -
  4. Types, abstraction, and refactoring
  5. +
  6. Object oriented design in Python
  7. Testing and auto-formatting
  8. Dependencies and GitHub actions
  9. Documentation
  10. diff --git a/03_types_protocols.html b/03_oop.html similarity index 60% rename from 03_types_protocols.html rename to 03_oop.html index a4374ec..3d34699 100644 --- a/03_types_protocols.html +++ b/03_oop.html @@ -10,7 +10,7 @@ - Python package development - Python types, interfaces and protocols + Python package development - Object oriented design in Python @@ -392,28 +392,212 @@
    -

    Python types, interfaces and protocols

    +

    Object oriented design in Python

    +
    +

    Object oriented design

    +

    Benefits of object oriented design:

    +
      +
    • Encapsulation
    • +
    • Code reuse (composition, inheritance)
    • +
    • Abstraction
    • +
    +
    +
    +

    Encapsulation

    +
    class Location:
    +    def __init__(self, name, longitude, latitude):
    +        self.name = name.upper() # Names are always uppercase
    +        self.longitude = longitude
    +        self.latitude = latitude
    +
    +>>> loc = Location("Antwerp", 4.42, 51.22)
    +>>> loc.name
    +'ANTWERP'
    +>>> loc.name = "Antwerpen"
    +>>> loc.name
    +"Antwerpen" 😟
    +
    +
    +

    Encapsulation - Attributes

    +

    Variables prefixed with an underscore (self._name) is a convention to indicate that the instance variable is private.

    +
    class Location:
    +    def __init__(self, name, longitude, latitude):
    +        self._name = name.upper() # Names are always uppercase
    +        ...
    +
    +    @property
    +    def name(self):
    +        return self._name
    +
    +    @name.setter
    +    def name(self, value):
    +        self._name = value.upper()
    +
    +>>> loc = Location("Antwerp", 4.42, 51.22)
    +>>> loc.name = "Antwerpen"
    +>>> loc.name
    +"ANTWERPEN" 😊
    +
    +
    +

    Composition

    +
    +
    +

    Composition in object oriented design is a way to combine objects or data types into more complex objects.

    +
    +
    +
    +
    +
    +
    classDiagram
    +
    +    class Grid{
    +        + nx
    +        + dx
    +        + ny
    +        + dy
    +        + find_index()
    +    }
    +
    +    class ItemInfo{
    +        + name
    +        + type
    +        + unit
    +    }
    +
    +    class DataArray{
    +        + data
    +        + time
    +        + item
    +        + geometry
    +        + plot()
    +    }
    +
    +    DataArray --* Grid
    +    DataArray --* ItemInfo
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +

    Composition - Example

    +
    class Grid:
    +    def __init__(self, nx, dx, ny, dy):
    +        self.nx = nx
    +        self.dx = dx
    +        self.ny = ny
    +        self.dy = dy
    +    
    +    def find_index(self, x,y):
    +        ...
    +
    +class DataArray:
    +    def __init__(self, data, time, item, geometry):
    +        self.data = data
    +        self.time = time
    +        self.item = item
    +        self.geometry = geometry
    +
    +    def plot(self):
    +        ...
    +
    +
    +

    Inheritance

    +
    +
    +

    Inheritance - Example

    +
    +
    +
    +
    + +
    classDiagram
    +
    +class _GeometryFM{
    ++ node_coordinates
    ++ element_table
    +}
    +
    +class GeometryFM2D{
    ++ interp2d()
    ++ get_element_area()
    ++ plot()
    +}
    +
    +class _GeometryFMLayered{
    +- _n_layers
    +- _n_sigma
    ++ to_2d_geometry()
    +}
    +
    +class GeometryFM3D{
    ++ plot()
    +}
    +
    +class GeometryFMVerticalProfile{
    ++ plot()
    +}
    +  _GeometryFM <|-- GeometryFM2D
    +  _GeometryFM <|-- _GeometryFMLayered
    +  _GeometryFMLayered <|-- GeometryFM3D
    +  _GeometryFMLayered <|-- GeometryFMVerticalProfile
    +
    +
    +
    +
    +
    +
    +
    +

    Inheritance - Example (2)

    +
    class _GeometryFMLayered(_GeometryFM):
    +    def __init__(self, nodes, elements, n_layers, n_sigma):
    +        # call the parent class init method
    +        super().__init__(
    +            nodes=nodes,
    +            elements=elements,
    +        )
    +        self._n_layers = n_layers
    +        self._n_sigma = n_sigma
    +
    +
    +

    Composition vs inheritance

    +
    +
      +
    • Inheritance is often used to reuse code, but this is not the main purpose of inheritance.
    • +
    • Inheritance is used to specialize behavior.
    • +
    • In most cases, composition is a better choice than inheritance.
    • +
    • Some recent programming languages (e.g. Go & Rust) do not support this style of inheritance.
    • +
    • Use inheritance only when it makes sense.
    • +
    +
    + +

    Types

    C#

    -
    int n = 2;
    -String s = "Hello";
    -
    -public String RepeatedString(String s, int n) {
    -    return Enumerable.Repeat(s, n).Aggregate((a, b) => a + b);
    -}
    +
    int n = 2;
    +String s = "Hello";
    +
    +public String RepeatedString(String s, int n) {
    +    return Enumerable.Repeat(s, n).Aggregate((a, b) => a + b);
    +}

    Python

    -
    n = 2
    -s = "Hello"
    -
    -def repeated_string(s, n):
    -    return s * n
    +
    n = 2
    +s = "Hello"
    +
    +def repeated_string(s, n):
    +    return s * n
    @@ -427,11 +611,11 @@

    Types

    Python with type hints

    -
    n: int = 2
    -s: str = "Hello"
    -
    -def repeated_string(s:str, n:int) -> str:
    -    return s * n
    +
    n: int = 2
    +s: str = "Hello"
    +
    +def repeated_string(s:str, n:int) -> str:
    +    return s * n
    @@ -439,12 +623,12 @@

    Abstraction

    Version A

    -
    total = 0.0
    -for x in values:
    -    total = total +x
    +
    total = 0.0
    +for x in values:
    +    total = total +x

    Version B

    -
    total = sum(values)
    +
    total = sum(values)
    @@ -513,28 +697,28 @@

    Collections Abstract Base Classes

    Collections Abstract Base Classes

    -
    >>> a = [1, 2, 3]
    ->>> 1 in a
    -True
    ->>> a.__contains__(1)
    -True
    ->>> len(a)
    -3
    ->>> a.__len__()
    -3
    ->>> for x in a:
    -...     v.append(x)
    ->>> it = a.__iter__()
    ->>> next(it)
    -1
    ->>> next(it)
    -2
    ->>> next(it)
    -3
    ->>> next(it)
    -Traceback (most recent call last):
    -  File "<stdin>", line 1, in <module>
    -StopIteration
    +
    >>> a = [1, 2, 3]
    +>>> 1 in a
    +True
    +>>> a.__contains__(1)
    +True
    +>>> len(a)
    +3
    +>>> a.__len__()
    +3
    +>>> for x in a:
    +...     v.append(x)
    +>>> it = a.__iter__()
    +>>> next(it)
    +1
    +>>> next(it)
    +2
    +>>> next(it)
    +3
    +>>> next(it)
    +Traceback (most recent call last):
    +  File "<stdin>", line 1, in <module>
    +StopIteration

    Collections Abstract Base Classes

    @@ -576,37 +760,37 @@

    Pythonic

    -
    class JavaLikeToolbox:
    -    
    -    def getToolByName(self, name: str) -> Tool:
    -        for tool in self.tools:
    -            if tool.name == name:
    -                return tool
    -
    -    def numberOfTools(self) -> int:
    -        return len(self.tools)
    -
    ->>> tb = JavaLikeToolbox([Hammer(), Screwdriver()])
    ->>> tb.getToolByName("hammer")
    -Hammer()
    ->>> tb.numberOfTools()
    -2
    +
    class JavaLikeToolbox:
    +    
    +    def getToolByName(self, name: str) -> Tool:
    +        for tool in self.tools:
    +            if tool.name == name:
    +                return tool
    +
    +    def numberOfTools(self) -> int:
    +        return len(self.tools)
    +
    +>>> tb = JavaLikeToolbox([Hammer(), Screwdriver()])
    +>>> tb.getToolByName("hammer")
    +Hammer()
    +>>> tb.numberOfTools()
    +2
    -
    class Toolbox:
    -
    -    def __getitem__(self, name: str) -> Tool:
    -        return self._tools[name]
    -    
    -    def __len__(self) -> int:
    -        return len(self.tools)
    -
    ->>> tb = Toolbox([Hammer(), Screwdriver()])
    ->>> tb["hammer"]
    -Hammer()
    ->>> len(tb)
    -2
    +
    class Toolbox:
    +
    +    def __getitem__(self, name: str) -> Tool:
    +        return self._tools[name]
    +    
    +    def __len__(self) -> int:
    +        return len(self.tools)
    +
    +>>> tb = Toolbox([Hammer(), Screwdriver()])
    +>>> tb["hammer"]
    +Hammer()
    +>>> len(tb)
    +2
    +
    + +
    class SparseMatrix:
    +    def __init__(self, shape, fill_value=0.0, data=None):
    +        self.shape = shape
    +        self._data = data if data is not None else {}
    +        self.fill_value = fill_value
    +        
    +    def __setitem__(self, key, value):
    +        i,j = key
    +        self._data[i,j] = float(value) 
    +
    +    def __getitem__(self, key) -> float:
    +        i,j = key
    +        return self._data.get((i,j), self.fill_value)
    +    
    +    def transpose(self) -> "SparseMatrix":
    +        data = {(j,i) : v for (i,j),v in self._data.items()}
    +        return SparseMatrix(data=data,
    +                            shape=self.shape,
    +                            fill_value=self.fill_value)
    +    
    +    def __repr__(self):
    +        matrix_str = ""
    +        for j in range(self.shape[1]):
    +            for i in range(self.shape[0]):
    +                value = self[i, j]
    +                matrix_str += f"{value:<4}"
    +            matrix_str += "\n"
    +        return matrix_str
    +
    +
    + +
    >>> m = SparseMatrix(shape=(2,2), fill_value=0.0)
    +>>> m
    +0.0 0.0
    +0.0 0.0
    +>>> m[0,1]
    +0.0
    +>>> m[0,1] = 1.0
    +>>> m[1,0] = 2.0
    +>>> m
    +0.0 2.0 
    +1.0 0.0 
    +>>> m.transpose()
    +0.0 1.0 
    +2.0 0.0
    +

    Duck typing

    @@ -628,11 +860,12 @@

    Duck typing

  11. If it walks like a duck and quacks like a duck, it’s a duck
  12. From the perspective of the caller, it doesn’t matter if it is a rubber duck or a real duck.
  13. The type of the object is not important, as long as it has the right methods.
  14. +
  15. Python is different than C# or Java, where you would have to create an interface IToolbox and implement it for Toolbox.
  16. -
    - +
    +

    Duck typing - Example

    An example is a Scikit learn transformers

    • fit
    • @@ -641,172 +874,82 @@

      Duck typing

    If you want to make a transformer compatible with sklearn, you have to implement these methods.

    -
    - -
    class MyTransformer:
    -    def fit(self, X, y=None):
    -        # do something
    -        return self
    -
    -    def transform(self, X):
    -        # do something
    -        return X
    -
    -    def fit_transform(self, X, y=None):
    -        return self.fit(X, y).transform(X)
    +
    +

    Duck typing - Example

    +
    class PositiveNumberTransformer:
    +
    +    def fit(self, X, y=None):
    +        # no need to fit (still need to have the method!)
    +        return self
    +
    +    def transform(self, X):
    +        return np.abs(X)
    +
    +    def fit_transform(self, X, y=None):
    +        return self.fit(X, y).transform(X)
    -
    - -
    from sklearn.base import TransformerMixin
    -
    -class MyOtherTransformer(TransformerMixin):
    -    def fit(self, X, y=None):
    -        # do something
    -        return self
    -
    -    def transform(self, X):
    -        # do something
    -        return X
    -
    -    # def fit_transform(self, X, y=None):
    -    # we get this for free, from TransformerMixin
    +
    +

    Duck typing - Mixins

    +

    We can inherit some behavior from sklearn.base.TransformerMixin

    +
    from sklearn.base import TransformerMixin
    +
    +class RemoveOutliersTransformer(TransformerMixin):
    +
    +    def __init__(self, lower_bound, upper_bound):
    +        self.lower_bound = lower_bound
    +        self.upper_bound = upper_bound
    +        self.lower_ = None
    +        self.upper_ = None
    +
    +    def fit(self, X, y=None):
    +        self.lower_ = np.quantile(X, self.lower_bound)
    +        self.upper_ = np.quantile(X, self.upper_bound)
    +
    +    def transform(self, X):
    +        return np.clip(X, self.lower_, self.upper_)
    +
    +    # def fit_transform(self, X, y=None):
    +    # we get this for free, from TransformerMixin
    -

    Let’s revisit the (date) Interval1

    -
    class Interval:
    -    def __init__(self, start, end):
    -        self.start = start
    -        self.end = end
    -
    -    def __contains__(self, x):
    -        return self.start < x < self.end
    -
    ->>> dr = Interval(date(2020, 1, 1), date(2020, 1, 31))
    -
    ->>> date(2020,1,15) in dr
    -True
    ->>> date(1970,1,1) in dr
    -False
    -
    -
    -

    Some other interval

    -
    class Interval:
    -    def __init__(self, start, end):
    -        self.start = start
    -        self.end = end
    -
    -    def __contains__(self, x):
    -        return self.start < x < self.end
    -    
    ->>> interval = Interval(5, 10)
    -
    ->>> 8 in interval
    -True
    ->>> 12 in interval
    -False
    +

    Let’s revisit the (date) Interval

    +

    The Interval class represent an interval in time.

    +
    class Interval:
    +    def __init__(self, start, end):
    +        self.start = start
    +        self.end = end
    +
    +    def __contains__(self, x):
    +        return self.start < x < self.end
    +
    +>>> dr = Interval(date(2020, 1, 1), date(2020, 1, 31))
    +
    +>>> date(2020,1,15) in dr
    +True
    +>>> date(1970,1,1) in dr
    +False
    -

    As long as the start, end and x are comparable, the Interval class is a generic class able to handle integers, floats, dates, datetimes, strings …

    +

    What if we want to make another type of interval, e.g. a interval of numbers \([1.0, 2.0]\)?

    -
    -

    Time for a discussion

    -

    Pre-work for this Module

    -
    -

    Read chapters

    -
      -
    1. Abstraction and encapsulation

    2. -
    3. Designing for high performance

    4. -
    -

    in Hillard, 2020

    -

    Do you need to understand every single line of code you use, or is it sometimes enough to treat functions as a black box?

    -

    Consider the implications of storing data in a list or in a dictionary.

    -
    -
    -
    -

    Time for a discussion

    -

    Discuss in learning teams (15 minutes):

    -

    Chapter 3: Abstraction and encapsulation

    -

    What are some pros/cons of treating functions as black boxes?

    +
    +

    A number interval

    +
    class Interval:
    +    def __init__(self, start, end):
    +        self.start = start
    +        self.end = end
    +
    +    def __contains__(self, x):
    +        return self.start < x < self.end
    +    
    +>>> interval = Interval(5, 10)
    +
    +>>> 8 in interval
    +True
    +>>> 12 in interval
    +False
    -

    Chapter 4: Designing for high performance

    -

    What is the performance impact of using a list instead of a dictionary?

    -
    -
    -

    After break out session:

    -

    One person from each team briefly presents their discussion outcomes

    -
    -
    -
    -

    Protocols

    -
    -
      -
    • Protocols were introduced in Python 3.8
    • -
    • Protocols are a way to define a set of methods that a class must implement
    • -
    • Protocols are not checked at runtime
    • -
    • Protocols can be used for static type checking (mypy)
    • -
    • Protocols are a hint to the developer
    • -
    -
    -
    -
    -

    Protocols

    -
    from dataclasses import dataclass
    -from typing import Protocol, List
    -import matplotlib.pyplot as plt
    -
    -
    -@dataclass
    -class Station:
    -    name: str
    -    longitude: float
    -    latitude: float
    -
    -
    -class StationRepository(Protocol):
    -    def __getitem__(self, key: str) -> Station:
    -        ...
    -
    -
    -def plot_stations(station_ids: List[int], repo: StationRepository) -> None:
    -    for id in station_ids:
    -        station = repo[id]
    -        plt.plot(station.longitude, station.latitude, "o")
    -        plt.text(station.longitude, station.latitude, station.name)
    -
    -
    -

    Protocols

    -
    class DictStationRepository:  # useful for testing (Module 4)
    -    def __getitem__(self, key: int) -> Station:
    -        stations = {
    -            42: Station("Antwerp", 4.42, 51.22),
    -            365: Station("Brussels", 4.35, 50.85),
    -            182: Station("Ghent", 3.72, 51.05),
    -        }
    -        return stations[key]
    -
    -
    -class FileStationRepository:
    -    def __getitem__(self, key: int) -> Station:
    -        df = pd.read_csv("stations.csv", index_col="id")
    -        row = df.loc[key]
    -        return Station(row["name"], key, row["longitude"], row["latitude"])
    -
    -
    -class DatabaseStationRepository:
    -    def __getitem__(self, key: int) -> Station:
    -        # read from database
    -        station = read_from_db(key)
    -        return station
    -
    -
    -

    Protocols

    -
    repo = DictStationRepository()
    -# repo = FileStationRepository()
    -
    -plot_stations([42, 365], repo=repo)
    -
    -

    Protocols allows to work with abstract types, without having to know the implementation details.

    -

    This is useful for testing, but allows for more flexible code.

    +

    As long as the start, end and x are comparable, the Interval class is a generic class able to handle integers, floats, dates, datetimes, strings …

    @@ -817,12 +960,12 @@

    Postel’s law

  17. Be conservative in what you send
-
def process(number: Union[int,str,float]) -> int:
-    # make sure number is an int from now on
-    number = int(number)
-
-    result = number * 2
-    return result   
+
def process(number: Union[int,str,float]) -> int:
+    # make sure number is an int from now on
+    number = int(number)
+
+    result = number * 2
+    return result   
@@ -859,56 +1002,56 @@

Common refactoring techniques:

Rename variable

Before

-
n = 0
-for v in y:
-    if v < 0:
-        n = n + 1
+
n = 0
+for v in y:
+    if v < 0:
+        n = n + 1

After

-
FREEZING_POINT = 0.0
-n_freezing_days = 0
-for temp in daily_max_temperatures:
-    if temp < FREEZING_POINT:
-        n_freezing_days = n_freezing_days + 1 
+
FREEZING_POINT = 0.0
+n_freezing_days = 0
+for temp in daily_max_temperatures:
+    if temp < FREEZING_POINT:
+        n_freezing_days = n_freezing_days + 1 

Extract variable

Before

-
def predict(x):
-    return min(0.0, 0.5 + 2.0 * min(0,x) + (random.random() - 0.5) / 10.0)
+
def predict(x):
+    return min(0.0, 0.5 + 2.0 * min(0,x) + (random.random() - 0.5) / 10.0)

After

-
def predict(x):
-    scale = 10.0
-    error = (random.random() - 0.5) / scale)
-    a = 0.5
-    b = 2.0 
-    draft = a + b * x + error
-    return  min(0.0, draft)
+
def predict(x):
+    scale = 10.0
+    error = (random.random() - 0.5) / scale)
+    a = 0.5
+    b = 2.0 
+    draft = a + b * x + error
+    return  min(0.0, draft)

Extract method

-
def error(scale):
-    return (random.random() - 0.5) / scale)
-
-def linear_model(x, *, a=0.0, b=1.0):
-    return a + b * x
-
-def clip(x, *, min_value=0.0):
-    return min(min_value, x)
-
-def predict(x): 
-    draft = linear_model(x, a=0.5, b=2.0) + error(scale=10.0)
-    return clip(draft, min_value=0.)
+
def error(scale):
+    return (random.random() - 0.5) / scale)
+
+def linear_model(x, *, a=0.0, b=1.0):
+    return a + b * x
+
+def clip(x, *, min_value=0.0):
+    return min(min_value, x)
+
+def predict(x): 
+    draft = linear_model(x, a=0.5, b=2.0) + error(scale=10.0)
+    return clip(draft, min_value=0.)

Inline method

Opposite of extract mehtod.

-
def predict(x): 
-    draft = linear_model(x, a=0.5, b=2.0) + error(scale=10.0)
-    return min(0.0, x)
+
def predict(x): 
+    draft = linear_model(x, a=0.5, b=2.0) + error(scale=10.0)
+    return min(0.0, x)

Composed method

@@ -916,51 +1059,51 @@

Composed method

-
# get data
-os.shutil.copyfile(thisfile, localfile)
-df = read_csv(localfile)
-
-# clean data
-df.dropna()
-df.drop_duplicates()
-df[somevar<0.0] = 0.0
-
-# transform data
-df.date = pd.to_datetime(df.date) - 86400
-
-# predict
-predictions = df.height + df.weight * df.age
+
# get data
+os.shutil.copyfile(thisfile, localfile)
+df = read_csv(localfile)
+
+# clean data
+df.dropna()
+df.drop_duplicates()
+df[somevar<0.0] = 0.0
+
+# transform data
+df.date = pd.to_datetime(df.date) - 86400
+
+# predict
+predictions = df.height + df.weight * df.age
-

-def get_data(filename,...):
-    ...
-
-def clean_data(df):
-    ...
-
-def transform_data(df):
-    ...
-
-def predict(df):
-    ...
-
-def main():
-    df = get_data("raw_data.csv")
-    clean_data = clean_data(df)
-    final_data = transform_data(clean_data)
-    predictions = predict(final_data)
+

+def get_data(filename,...):
+    ...
+
+def clean_data(df):
+    ...
+
+def transform_data(df):
+    ...
+
+def predict(df):
+    ...
+
+def main():
+    df = get_data("raw_data.csv")
+    clean_data = clean_data(df)
+    final_data = transform_data(clean_data)
+    predictions = predict(final_data)
-

Composed method1

+

Composed method

-
+
@@ -970,30 +1113,12 @@

Composed method1

If you want to learn more about refactoring, I recommend the book “Refactoring: Improving the Design of Existing Code” by Martin Fowler.

-
-
-

Homework

-
    -
  1. Create a new branch on your project with a new feature

  2. -
  3. Create a class which is relevant to the domain of your project, please note that this is simply a starting point, not the final product.

  4. -
  5. Create at least one public method

  6. -
  7. Create at least one public property

  8. -
  9. Make use of one or more private (helper) methods in the public method and/or property

  10. -
  11. Commit code incrementally as you create it, the first commit should not be complete

  12. -
  13. Create a pull request with a short description of this “feature”

  14. -
  15. Ask for reviews from one of your peers as well as the instructor assigned to your project

  16. -
-
-

And don’t forget to do the Quiz!

-
- - diff --git a/03_types_protocols.qmd b/03_oop.qmd similarity index 60% rename from 03_types_protocols.qmd rename to 03_oop.qmd index 72f6a21..fd9ec65 100644 --- a/03_types_protocols.qmd +++ b/03_oop.qmd @@ -1,5 +1,5 @@ --- -title: "Python types, interfaces and protocols" +title: "Object oriented design in Python" format: revealjs: slide-number: true @@ -7,6 +7,194 @@ footer: Python package development logo: academy_logo.png --- +## Object oriented design + +Benefits of object oriented design: + +* Encapsulation +* Code reuse (composition, inheritance) +* Abstraction + + +## Encapsulation + +```{.python code-line-numbers="1-5|7-9|10-12"} +class Location: + def __init__(self, name, longitude, latitude): + self.name = name.upper() # Names are always uppercase + self.longitude = longitude + self.latitude = latitude + +>>> loc = Location("Antwerp", 4.42, 51.22) +>>> loc.name +'ANTWERP' +>>> loc.name = "Antwerpen" +>>> loc.name +"Antwerpen" 😟 +``` + +## Encapsulation - Attributes + +Variables prefixed with an underscore (`self._name`) is a convention to indicate that the instance variable is private. + +```{.python code-line-numbers="|3,7-9|10-11"} +class Location: + def __init__(self, name, longitude, latitude): + self._name = name.upper() # Names are always uppercase + ... + + @property + def name(self): + return self._name + + @name.setter + def name(self, value): + self._name = value.upper() + +>>> loc = Location("Antwerp", 4.42, 51.22) +>>> loc.name = "Antwerpen" +>>> loc.name +"ANTWERPEN" 😊 +``` + +## Composition {.smaller} + +::: {.columns} + +::: {.column} + + +Composition in object oriented design is a way to combine objects or data types into more complex objects. + +::: + +::: {.column} + +```{mermaid} +classDiagram + + class Grid{ + + nx + + dx + + ny + + dy + + find_index() + } + + class ItemInfo{ + + name + + type + + unit + } + + class DataArray{ + + data + + time + + item + + geometry + + plot() + } + + DataArray --* Grid + DataArray --* ItemInfo +``` + +::: + +:::: + +## Composition - Example {.smaller} + +```python +class Grid: + def __init__(self, nx, dx, ny, dy): + self.nx = nx + self.dx = dx + self.ny = ny + self.dy = dy + + def find_index(self, x,y): + ... + +class DataArray: + def __init__(self, data, time, item, geometry): + self.data = data + self.time = time + self.item = item + self.geometry = geometry + + def plot(self): + ... +``` + +## Inheritance + +## Inheritance - Example + +```{mermaid} +classDiagram + +class _GeometryFM{ ++ node_coordinates ++ element_table +} + +class GeometryFM2D{ ++ interp2d() ++ get_element_area() ++ plot() +} + +class _GeometryFMLayered{ +- _n_layers +- _n_sigma ++ to_2d_geometry() +} + +class GeometryFM3D{ ++ plot() +} + +class GeometryFMVerticalProfile{ ++ plot() +} + _GeometryFM <|-- GeometryFM2D + _GeometryFM <|-- _GeometryFMLayered + _GeometryFMLayered <|-- GeometryFM3D + _GeometryFMLayered <|-- GeometryFMVerticalProfile +``` + +## Inheritance - Example (2) + +```python +class _GeometryFMLayered(_GeometryFM): + def __init__(self, nodes, elements, n_layers, n_sigma): + # call the parent class init method + super().__init__( + nodes=nodes, + elements=elements, + ) + self._n_layers = n_layers + self._n_sigma = n_sigma +``` + +## Composition vs inheritance + +::: {.incremental} + +* Inheritance is often used to reuse code, but this is not the main purpose of inheritance. +* Inheritance is used to specialize behavior. +* In most cases, composition is a better choice than inheritance. +* Some recent programming languages (e.g. Go & Rust) do not support this style of inheritance. +* Use inheritance only when it makes sense. + +::: + +::: aside +Hillard, 2020, Ch. 8 "The rules (and exceptions) of inheritance" +::: + + ## Types **C#** @@ -228,6 +416,60 @@ Hammer() You want your code to be feel like the built-in types. ::: +--- + +```{.python code-line-numbers="|7-13"} +class SparseMatrix: + def __init__(self, shape, fill_value=0.0, data=None): + self.shape = shape + self._data = data if data is not None else {} + self.fill_value = fill_value + + def __setitem__(self, key, value): + i,j = key + self._data[i,j] = float(value) + + def __getitem__(self, key) -> float: + i,j = key + return self._data.get((i,j), self.fill_value) + + def transpose(self) -> "SparseMatrix": + data = {(j,i) : v for (i,j),v in self._data.items()} + return SparseMatrix(data=data, + shape=self.shape, + fill_value=self.fill_value) + + def __repr__(self): + matrix_str = "" + for j in range(self.shape[1]): + for i in range(self.shape[0]): + value = self[i, j] + matrix_str += f"{value:<4}" + matrix_str += "\n" + return matrix_str +``` + +--- + +```python +>>> m = SparseMatrix(shape=(2,2), fill_value=0.0) +>>> m +0.0 0.0 +0.0 0.0 +>>> m[0,1] +0.0 +>>> m[0,1] = 1.0 +>>> m[1,0] = 2.0 +>>> m +0.0 2.0 +1.0 0.0 +>>> m.transpose() +0.0 1.0 +2.0 0.0 +``` + + + ## Duck typing @@ -236,11 +478,13 @@ You want your code to be feel like the built-in types. * "*If it walks like a duck and quacks like a duck, it's a duck*" * From the perspective of the caller, it doesn't matter if it is a rubber duck or a real duck. * The type of the object is **not important**, as long as it has the right methods. +* Python is different than C# or Java, where you would have to create an interface `IToolbox` and implement it for `Toolbox`. ::: ---- +## Duck typing - Example + An example is a Scikit learn transformers @@ -250,43 +494,53 @@ An example is a Scikit learn transformers If you want to make a transformer compatible with sklearn, you have to implement these methods. ---- +## Duck typing - Example ```python -class MyTransformer: +class PositiveNumberTransformer: + def fit(self, X, y=None): - # do something + # no need to fit (still need to have the method!) return self def transform(self, X): - # do something - return X + return np.abs(X) def fit_transform(self, X, y=None): return self.fit(X, y).transform(X) ``` ---- +## Duck typing - Mixins {.smaller} -```python +We can inherit some behavior from `sklearn.base.TransformerMixin` + +```{.python code-line-numbers="|1,3,18,19"} from sklearn.base import TransformerMixin -class MyOtherTransformer(TransformerMixin): +class RemoveOutliersTransformer(TransformerMixin): + + def __init__(self, lower_bound, upper_bound): + self.lower_bound = lower_bound + self.upper_bound = upper_bound + self.lower_ = None + self.upper_ = None + def fit(self, X, y=None): - # do something - return self + self.lower_ = np.quantile(X, self.lower_bound) + self.upper_ = np.quantile(X, self.upper_bound) def transform(self, X): - # do something - return X + return np.clip(X, self.lower_, self.upper_) # def fit_transform(self, X, y=None): # we get this for free, from TransformerMixin ``` -## Let's revisit the (date) Interval^[https://martinfowler.com/eaaDev/Range.html] +## Let's revisit the (date) Interval + +The `Interval` class represent an interval in time. -```{.python code-line-numbers="6-7|11-14"} +```{.python code-line-numbers="|6-7|11-14"} class Interval: def __init__(self, start, end): self.start = start @@ -303,7 +557,12 @@ True False ``` -## Some other interval +. . . + +What if we want to make another type of interval, e.g. a interval of numbers $[1.0, 2.0]$? + + +## A number interval ```{.python code-line-numbers="9-14"} class Interval: @@ -326,126 +585,6 @@ False As long as the `start`, `end` and `x` are comparable, the `Interval` class is a generic class able to handle integers, floats, dates, datetimes, strings ... -## Time for a discussion {.smaller} - -Pre-work for this Module - -> Read chapters -> -> 3. Abstraction and encapsulation -> -> 4. Designing for high performance -> -> in Hillard, 2020 -> -> Do you need to understand every single line of code you use, or is it sometimes enough to treat functions as a black box? -> -> Consider the implications of storing data in a list or in a dictionary. - -## Time for a discussion - -*Discuss in learning teams (15 minutes):* - -**Chapter 3: Abstraction and encapsulation** - -What are some pros/cons of treating functions as black boxes? - -. . . - -**Chapter 4: Designing for high performance** - -What is the performance impact of using a list instead of a dictionary? - -. . . - -*After break out session:* - -One person from each team briefly presents their discussion outcomes - - - -## Protocols - -::: {.incremental} - -* Protocols were introduced in Python 3.8 -* Protocols are a way to define a set of methods that a class must implement -* Protocols are not checked at runtime -* Protocols can be used for static type checking (mypy) -* Protocols are a hint to the developer - -::: - -## Protocols {.smaller} - - -```{.python} -from dataclasses import dataclass -from typing import Protocol, List -import matplotlib.pyplot as plt - - -@dataclass -class Station: - name: str - longitude: float - latitude: float - - -class StationRepository(Protocol): - def __getitem__(self, key: str) -> Station: - ... - - -def plot_stations(station_ids: List[int], repo: StationRepository) -> None: - for id in station_ids: - station = repo[id] - plt.plot(station.longitude, station.latitude, "o") - plt.text(station.longitude, station.latitude, station.name) -``` - -## Protocols {.smaller} - -```{.python code-line-numbers="1-2|11-12|18-19"} -class DictStationRepository: # useful for testing (Module 4) - def __getitem__(self, key: int) -> Station: - stations = { - 42: Station("Antwerp", 4.42, 51.22), - 365: Station("Brussels", 4.35, 50.85), - 182: Station("Ghent", 3.72, 51.05), - } - return stations[key] - - -class FileStationRepository: - def __getitem__(self, key: int) -> Station: - df = pd.read_csv("stations.csv", index_col="id") - row = df.loc[key] - return Station(row["name"], key, row["longitude"], row["latitude"]) - - -class DatabaseStationRepository: - def __getitem__(self, key: int) -> Station: - # read from database - station = read_from_db(key) - return station -``` - -## Protocols - -```python -repo = DictStationRepository() -# repo = FileStationRepository() - -plot_stations([42, 365], repo=repo) -``` - -. . . - -Protocols allows to work with abstract types, without having to know the implementation details. - -This is useful for testing, but allows for more flexible code. - ## Postel's law a.k.a. the Robustness principle of software design @@ -621,7 +760,7 @@ def main(): predictions = predict(final_data) ``` -## Composed method^[https://wiki.c2.com/?ComposedMethod] +## Composed method * Divide your program into methods that perform one identifiable task * Keep all of the operations in a method at the same level of abstraction. @@ -648,24 +787,3 @@ If you want to learn more about refactoring, I recommend the book "Refactoring: :::: -# Homework {.smaller} - -1. Create a new branch on your project with a new feature - -2. Create a class which is relevant to the domain of your project, please note that this is simply a starting point, not the final product. - -3. Create at least one public method - -4. Create at least one public property - -5. Make use of one or more private (helper) methods in the public method and/or property - -6. Commit code incrementally as you create it, the first commit should not be complete - -7. Create a pull request with a short description of this "feature" - -8. Ask for reviews from one of your peers as well as the instructor assigned to your project - -. . . - - **And don't forget to do the Quiz!** \ No newline at end of file diff --git a/course_structure.html b/course_structure.html new file mode 100644 index 0000000..33fd76d --- /dev/null +++ b/course_structure.html @@ -0,0 +1,344 @@ + + + + + + + + + +Python package development – course_structure + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ + + +

+graph TD
+
+    M1(Git, Pull Requests, and code reviews)
+    M2(Python functions, classes, and modules)
+    M3(Object oriented design in Python)
+    M4(Testing and auto-formatting)
+    M5(Dependencies and GitHub actions)
+    M6(Documentation)
+    M7(Distributing your package)
+
+    B1(The bigger picture)
+    B2(Separations of concern)
+    B3(Abstraction and encapsulation)
+    B4(Designing for high performance)
+    B5(Testing your software)
+    B6(Separations of concerns in practice)
+    B7(Extensibility and flexibility)
+    B8(The rules and exceptions of inheritance)
+
+    M1 --> M2 --> M3 --> M4 --> M5 --> M6 --> M7
+
+    B1 --> M2
+    B2 --> M2
+    B4 --> M4
+ + + +
+ +
+ + + + \ No newline at end of file diff --git a/index.html b/index.html index 03bc2da..7ce0d7e 100644 --- a/index.html +++ b/index.html @@ -96,7 +96,7 @@

Learning modules

  1. Git, Pull Requests, and code reviews
  2. Python functions, classes, and modules
  3. -
  4. Types, abstraction, and refactoring
  5. +
  6. Object oriented design in Python
  7. Testing and auto-formatting
  8. Dependencies and GitHub actions
  9. Documentation
  10. diff --git a/search.json b/search.json index 1f7a3f0..acaa308 100644 --- a/search.json +++ b/search.json @@ -1,4 +1,18 @@ [ + { + "objectID": "index.html", + "href": "index.html", + "title": "Python package development", + "section": "", + "text": "Introduction" + }, + { + "objectID": "index.html#learning-modules", + "href": "index.html#learning-modules", + "title": "Python package development", + "section": "Learning modules", + "text": "Learning modules\n\nGit, Pull Requests, and code reviews\nPython functions, classes, and modules\nObject oriented design in Python\nTesting and auto-formatting\nDependencies and GitHub actions\nDocumentation\nDistributing your package\n\n©️ DHI 2023" + }, { "objectID": "00_introduction.html#instructors", "href": "00_introduction.html#instructors", @@ -18,7 +32,7 @@ "href": "00_introduction.html#learning-modules", "title": "Python package development", "section": "Learning modules", - "text": "Learning modules\n\nGit, Pull Requests, and code reviews\nPython functions, classes, and modules\nTypes, abstraction, and refactoring\nTesting and auto-formatting\nDependencies and GitHub actions\nDocumentation\nDistributing your package" + "text": "Learning modules\n\nGit, Pull Requests, and code reviews\nPython functions, classes, and modules\nObject oriented design in Python\nTesting and auto-formatting\nDependencies and GitHub actions\nDocumentation\nDistributing your package" }, { "objectID": "00_introduction.html#learning-objectives", @@ -49,550 +63,564 @@ "text": "Poll\n\n\n\nPython package development" }, { - "objectID": "02b_naming_conventions.html#variables-function-and-method-names", - "href": "02b_naming_conventions.html#variables-function-and-method-names", - "title": "Python - Naming conventions", - "section": "Variables, function and method names", - "text": "Variables, function and method names\n\nUse lowercase characters\nSeparate words with underscores\n\n\nmodel_name = \"NorthSeaModel\"\nn_epochs = 100\n\ndef my_function():\n pass" + "objectID": "01_version_control.html#why-use-version-control", + "href": "01_version_control.html#why-use-version-control", + "title": "Git, GitHub, Pull Requests, and code reviews", + "section": "Why use version control?", + "text": "Why use version control?\n\n\n\n\n\nManage changes to code over time\nKeep track of changes and revert to previous versions if needed.\nCollaborate and merge changes from different people\nEnsure code stability\nBest practice for software development" }, { - "objectID": "02b_naming_conventions.html#constants", - "href": "02b_naming_conventions.html#constants", - "title": "Python - Naming conventions", - "section": "Constants", - "text": "Constants\n\nUse all uppercase characters\n\nGRAVITY = 9.81\n\nAVOGADRO_CONSTANT = 6.02214076e23\n\nSECONDS_IN_A_DAY = 86400\n\nN_LEGS_PER_ANIMAL = {\n \"human\": 2,\n \"dog\": 4,\n \"spider\": 8,\n}\n\nPython will not prevent you from changing the value of a constant, but it is a convention to use all uppercase characters for constants." + "objectID": "01_version_control.html#centralized-version-control", + "href": "01_version_control.html#centralized-version-control", + "title": "Git, GitHub, Pull Requests, and code reviews", + "section": "Centralized version control", + "text": "Centralized version control\n\nSingle source with the entire history\nLocal copy with latest version . . .\nExamples: SVN, Surround" }, { - "objectID": "02b_naming_conventions.html#classes", - "href": "02b_naming_conventions.html#classes", - "title": "Python - Naming conventions", - "section": "Classes", - "text": "Classes\n\nUse CamelCase for the name of the class\nUse lowercase characters for the name of the methods\nSeparate words with underscores\n\n\nclass RandomClassifier: # CamelCase ✅\n\n def fit(self, X, y):\n self.classes_ = np.unique(y)\n\n def predict(self, X):\n return np.random.choice(self.classes_, size=len(X))\n\n def fit_predict(self, X, y): # lowercase ✅\n self.fit(X, y)\n return self.predict(X)" + "objectID": "01_version_control.html#distributed-version-control", + "href": "01_version_control.html#distributed-version-control", + "title": "Git, GitHub, Pull Requests, and code reviews", + "section": "Distributed version control", + "text": "Distributed version control\n\nLocal copy has the entire history\nCommit changes to code offline\nAuthorative source (origin) . . .\nExamples: Git, Mercurial" }, { - "objectID": "02b_naming_conventions.html#function-return-values", - "href": "02b_naming_conventions.html#function-return-values", - "title": "Python - Naming conventions", - "section": "Function return values", - "text": "Function return values\ndef my_function() -> str:\n return \"42\"\n\ndef my_other_function() -> None: # it doesn't return anything 🤔\n print(\"42\")\n\nIn action:\n>>> my_function()\n'42'\n>>> my_other_function()\n42\n>>> x = my_function()\n>>> x\n'42'\n>>> y = my_other_function()\n>>> y" + "objectID": "01_version_control.html#git", + "href": "01_version_control.html#git", + "title": "Git, GitHub, Pull Requests, and code reviews", + "section": "Git", + "text": "Git\nGit is a powerful tool for managing code changes and collaborating with others on a project.\n\nYou can use Git from the command line, or with a graphical user interface (GUI).\n\n\n> git add foo.py\n\n\n> git commit -m \"Nailed it\"\n\n\n> git push" }, { - "objectID": "02b_naming_conventions.html#github-repo-naming-convention", - "href": "02b_naming_conventions.html#github-repo-naming-convention", - "title": "Python - Naming conventions", - "section": "Github repo naming convention", - "text": "Github repo naming convention\n\nThis is just a suggestion\nUse lowercase characters\nSeparate words with dashes\n\nExample: my-awesome-repo\n\n\n\nPython package development" + "objectID": "01_version_control.html#basic-git-commands", + "href": "01_version_control.html#basic-git-commands", + "title": "Git, GitHub, Pull Requests, and code reviews", + "section": "Basic Git commands", + "text": "Basic Git commands\n\n\ngit add: adds a file to the staging area\ngit commit: creates a new commit with the changes in the staging area\ngit status: shows the current status of your repository\ngit log: shows the commit history of your repository\ngit stash: temporarily save changes that are not ready to be committed" }, { - "objectID": "02_function_classes.html#functions-as-black-boxes", - "href": "02_function_classes.html#functions-as-black-boxes", - "title": "Functions, classes and modules", - "section": "Functions as black boxes", - "text": "Functions as black boxes\n\n\n\n\nflowchart LR\n A(Input A) --> F[\"Black box\"]\n B(Input B) --> F\n F --> O(Output)\n\n style F fill:#000,color:#fff,stroke:#333,stroke-width:4px\n\n\n\n\n\n\n\nA function is a black box that takes some input and produces some output.\nThe input and output can be anything, including other functions.\nAs long as the input and output are the same, the function body can be modified." + "objectID": "01_version_control.html#working-with-remote-repositories", + "href": "01_version_control.html#working-with-remote-repositories", + "title": "Git, GitHub, Pull Requests, and code reviews", + "section": "Working with remote repositories", + "text": "Working with remote repositories\n\n\ngit clone: creates a copy of the codebase on your local machine.\ngit push: pushes changes back to the remote repository.\ngit pull: pulls changes from the remote repository." }, { - "objectID": "02_function_classes.html#pure-functions", - "href": "02_function_classes.html#pure-functions", - "title": "Functions, classes and modules", - "section": "Pure functions", - "text": "Pure functions\nA pure function returns the same output for the same input.\ndef f(x)\n return x**2\n\n>> f(2)\n4\n>> f(2)\n4" + "objectID": "01_version_control.html#branching-and-merging", + "href": "01_version_control.html#branching-and-merging", + "title": "Git, GitHub, Pull Requests, and code reviews", + "section": "Branching and Merging", + "text": "Branching and Merging\n\nA branch is a separate version of your code that you can work on independently from the main branch.\ngit merge: merges changes back into the main branch (we will do this from GitHub)" }, { - "objectID": "02_function_classes.html#side-effects", - "href": "02_function_classes.html#side-effects", - "title": "Functions, classes and modules", - "section": "Side effects", - "text": "Side effects\nA function can have side effects, like appending to a a file\ndef f_with_side_effect(x):\n with open(\"output.txt\", \"a\") as f:\n f.write(str(x))\n return x**2\n\nThe function has x as input, returns the square of x, but also appends x to a file. If you run the function a second time, the file will contain two lines." + "objectID": "01_version_control.html#git-hosting-platforms", + "href": "01_version_control.html#git-hosting-platforms", + "title": "Git, GitHub, Pull Requests, and code reviews", + "section": "Git hosting platforms", + "text": "Git hosting platforms" }, { - "objectID": "02_function_classes.html#side-effects-1", - "href": "02_function_classes.html#side-effects-1", - "title": "Functions, classes and modules", - "section": "Side effects", - "text": "Side effects\nPure functions with no side effects are easier to reason about.\nBut sometimes side effects are necessary.\n\nWriting to a file\nWriting to a database\nPrinting to the screen\nCreating a plot" + "objectID": "01_version_control.html#github", + "href": "01_version_control.html#github", + "title": "Git, GitHub, Pull Requests, and code reviews", + "section": "GitHub", + "text": "GitHub\n\n\nGit repository hosting service\nCollaborate with others on codebase\nFork a repository to work on your own version\nPull requests for code review and merging changes\nIssue tracking and project management tools\nGitHub Pages for hosting websites" }, { - "objectID": "02_function_classes.html#modifying-input-arguments", - "href": "02_function_classes.html#modifying-input-arguments", - "title": "Functions, classes and modules", - "section": "Modifying input arguments", - "text": "Modifying input arguments\ndef difficult_function(values):\n for i in range(len(values)):\n values[i] = min(0, values[i]) # 😟\n return values\n\n>>> x = [1,2,-1]\n>>> difficult_function(x)\n>>> x\n[0,0,-1]\n\nThis function modifies the input array, which might come as a surprise. The array is passed by reference, so the function can modify it." + "objectID": "01_version_control.html#github-flow", + "href": "01_version_control.html#github-flow", + "title": "Git, GitHub, Pull Requests, and code reviews", + "section": "Github flow", + "text": "Github flow\n\n\n\nCreate a branch\nMake changes\nCreate a pull request\nReview\nMerge\n\n\n\n\nClone a repository to work on a copy (optionally: fork first)\nCreate a branch for each new feature or fix\nCommit changes and push to remote repository\nOpen a pull request to propose changes and request code review\nMerge changes back into the main branch" }, { - "objectID": "02_function_classes.html#positional-arguments", - "href": "02_function_classes.html#positional-arguments", - "title": "Functions, classes and modules", - "section": "Positional arguments", - "text": "Positional arguments\ndef f(x, y):\n return x + y\n\n>>> f(1, 2)\n3" + "objectID": "01_version_control.html#time-for-a-discussion", + "href": "01_version_control.html#time-for-a-discussion", + "title": "Git, GitHub, Pull Requests, and code reviews", + "section": "Time for a discussion", + "text": "Time for a discussion\nDiscuss in learning teams (15 minutes):\n\nIntroduce your project briefly\nThink about a project you’ve worked on in the past that involved collaborating with others on code. What challenges did you face, and how do you think Git and GitHub could have helped to address those challenges?\n\nAfter break out session:\n\nOne person from each team briefly presents their discussion outcomes\n\n\n\nWhat is the benefit of working in branches?\nWhat are some best practices for collaborating on code with others, and how can Git and GitHub help to support those best practices?" }, { - "objectID": "02_function_classes.html#keyword-arguments", - "href": "02_function_classes.html#keyword-arguments", - "title": "Functions, classes and modules", - "section": "Keyword arguments", - "text": "Keyword arguments\ndef f(x, y):\n return x + y\n\n>>> f(x=1, y=2)\n3" + "objectID": "01_version_control.html#desktop-application-github-desktop", + "href": "01_version_control.html#desktop-application-github-desktop", + "title": "Git, GitHub, Pull Requests, and code reviews", + "section": "Desktop Application: GitHub Desktop", + "text": "Desktop Application: GitHub Desktop" }, { - "objectID": "02_function_classes.html#positional-arguments-1", - "href": "02_function_classes.html#positional-arguments-1", - "title": "Functions, classes and modules", - "section": "Positional arguments", - "text": "Positional arguments\n\n\nVersion 1\ndef is_operable(height, period):\n\n return height < 2.0 and period < 6.0\n\n>>> is_operable(1.0, 3.0)\nTrue\n\nVersion 2\ndef is_operable(period, height=0.0):\n # dont forget, that arguments are swapped 👍\n return height < 2.0 and period < 6.0\n\n>>> is_operable(1.0, 3.0)\nFalse 😟\n\n\n\nThe order of the arguments is swapped, since we want to make height an optional argument (more on that later). This breaks existing code, since the order of the arguments is changed." + "objectID": "01_version_control.html#demo", + "href": "01_version_control.html#demo", + "title": "Git, GitHub, Pull Requests, and code reviews", + "section": "Demo", + "text": "Demo" }, { - "objectID": "02_function_classes.html#keyword-only-arguments", - "href": "02_function_classes.html#keyword-only-arguments", - "title": "Functions, classes and modules", - "section": "Keyword only arguments", - "text": "Keyword only arguments\ndef f(*, x, y):\n return x + y\n\n>>> f(1,2)\nTraceback (most recent call last):\n File \"<stdin>\", line 1, in <module>\nTypeError: f() takes 0 positional arguments but 2 were given" + "objectID": "01_version_control.html#github-best-practices", + "href": "01_version_control.html#github-best-practices", + "title": "Git, GitHub, Pull Requests, and code reviews", + "section": "Github best practices", + "text": "Github best practices\n\n\nCommit often\nUse descriptive commit messages\nKeep pull requests small and focused\nUse “issues” to track work\nReview code regularly" }, { - "objectID": "02_function_classes.html#optional-default-arguments", - "href": "02_function_classes.html#optional-default-arguments", - "title": "Functions, classes and modules", - "section": "Optional (default) arguments", - "text": "Optional (default) arguments\ndef f(x, n=2):\n return x**n\n\n>>> f(2)\n4\n>>> f(2, n=3)\n8\n\nMakes it easy to use a function with many arguments." + "objectID": "01_version_control.html#resources", + "href": "01_version_control.html#resources", + "title": "Git, GitHub, Pull Requests, and code reviews", + "section": "Resources", + "text": "Resources\n\nGitHub: quickstart\nRealPython: git and github intro\nDatacamp: introduction to Git" }, { - "objectID": "02_function_classes.html#mutable-default-arguments", - "href": "02_function_classes.html#mutable-default-arguments", - "title": "Functions, classes and modules", - "section": "Mutable default arguments", - "text": "Mutable default arguments\nPython’s default arguments are evaluated once when the function is defined, not each time the function is called.\n\ndef add_to_cart(x, cart=[]): # this line is evaluated only once 😮\n cart.append(x)\n return cart\n\n>>> add_to_cart(1, cart=[2])\n[2, 1]\n\n>>> add_to_cart(1)\n[1]\n>>> add_to_cart(2)\n[1, 2]\n\nPython’s default arguments are evaluated once when the function is defined, not each time the function is called (like it is in say, Ruby). This means that if you use a mutable default argument and mutate it, you will and have mutated that object for all future calls to the function as well." + "objectID": "01_version_control.html#word-list", + "href": "01_version_control.html#word-list", + "title": "Git, GitHub, Pull Requests, and code reviews", + "section": "Word list", + "text": "Word list\n\nClone\n\nmaking a local copy of a remote repository on your computer.\n\nRemote\n\na reference to a Git repository that is hosted on a remote server, typically on a service like GitHub.\n\nCommit\n\na record of changes made to a repository, including the changes themselves and a message describing what was changed.\n\nStage\n\nselecting changes that you want to include in the next commit.\n\nPush\n\nsending changes from your local repository to a remote repository.\n\nPull\n\nretrieving changes from a remote repository and merging them into your local repository.\n\nBranch\n\na separate line of development that can be used to work on new features or bug fixes without affecting the main codebase.\n\nPull request\n\na way to propose changes to a repository by asking the repository owner to “pull” in the changes from a branch or fork.\n\nStash\n\ntemporarily save changes that are not ready to be committed (bring them back later when needed).\n\nMerge\n\nthe process of combining changes from one branch or fork into another, typically the main codebase.\n\nRebase\n\na way to integrate changes from one branch into another by applying the changes from the first branch to the second branch as if they were made there all along.\n\nMerge conflict\n\nwhen Git is unable to automatically merge changes from two different branches, because the changes overlap or conflict.\n\nCheckout\n\nswitching between different branches or commits in a repository.\n\nFork\n\na copy of a repository that you create on your own account, which you can modify without affecting the original repository." }, { - "objectID": "02_function_classes.html#how-to-use-default-mutable-arguments", - "href": "02_function_classes.html#how-to-use-default-mutable-arguments", - "title": "Functions, classes and modules", - "section": "How to use default (mutable) arguments", - "text": "How to use default (mutable) arguments\ndef add_to_cart_safe(x, cart=None):\n if cart is None:\n cart = [] # this line is evaluated each time the function is called\n cart.append(x)\n return cart" + "objectID": "01_version_control.html#summary", + "href": "01_version_control.html#summary", + "title": "Git, GitHub, Pull Requests, and code reviews", + "section": "Summary", + "text": "Summary\n\n\nVersion control is a tool for managing changes to code\nGit is a distributed version control system (software)\nGitHub is a platform for hosting and collaborating on Git repositories\nGitHub Desktop is a GUI for Git (and GitHub)\nPull requests are a way to propose changes to a repository\n\n\n\n\n\nPython package development" }, { - "objectID": "02_function_classes.html#changing-return-types", - "href": "02_function_classes.html#changing-return-types", - "title": "Functions, classes and modules", - "section": "Changing return types", - "text": "Changing return types\nSince Python is a dynamic language, the type of the returned variable is allowed to vary.\ndef foo(x):\n if x >=0:\n return x\n else:\n return \"x is negative\"\n\nBut it usually a bad idea, since you can not tell from reading the code, which type will be returned." + "objectID": "07_packaging.html#packaging", + "href": "07_packaging.html#packaging", + "title": "Distributing your Python package", + "section": "Packaging", + "text": "Packaging\nPackaging means creating a package that can be installed by pip.\nThere are many ways to create an installable package, and many ways to distribute it.\nWe will show how to create a package using hatchling, and how to distribute it on GitHub, PyPI and a private PyPI server." }, { - "objectID": "02_function_classes.html#changing-return-types-1", - "href": "02_function_classes.html#changing-return-types-1", - "title": "Functions, classes and modules", - "section": "Changing return types", - "text": "Changing return types\ndef is_operable(height, period):\n if height < 10:\n return height < 5.0 and period > 4.0\n else:\n return \"No way!\"\n\n>>> if is_operable(height=12.0, period=5.0):\n... print(\"Go ahead!\")\n...\nGo ahead!\n\n\n\n\n\n\n\nImportant\n\n\nIs this the result you expected?\n\n\n\n\n\nA non-empty string or a non-zero value is considered “truthy” in Python!" + "objectID": "07_packaging.html#benefits-of-packaging", + "href": "07_packaging.html#benefits-of-packaging", + "title": "Distributing your Python package", + "section": "Benefits of packaging", + "text": "Benefits of packaging\n\n\nDistribute your package to others\nInstall your package with pip\nSpecify dependencies\nReproducibility\nSpecify version\nRelease vs. development versions" }, { - "objectID": "02_function_classes.html#type-hints", - "href": "02_function_classes.html#type-hints", - "title": "Functions, classes and modules", - "section": "Type hints", - "text": "Type hints\nPython is a dynamically typed language -> the type of a variable is determined at runtime.\n\nBut we can add type hints to help the reader (and the code editor).\ndef is_operable(height: float, period: float) -> bool:\n ..." + "objectID": "07_packaging.html#packaging-workflow", + "href": "07_packaging.html#packaging-workflow", + "title": "Distributing your Python package", + "section": "Packaging workflow", + "text": "Packaging workflow\n\nCreate a pyproject.toml in the root folder of the project\nBuild a package (e.g. myproject-0.1.0-py3-none-any.whl)\nUpload the package to location, where others can find it" }, { - "objectID": "02_function_classes.html#time-for-a-discussion", - "href": "02_function_classes.html#time-for-a-discussion", - "title": "Functions, classes and modules", - "section": "Time for a discussion", - "text": "Time for a discussion\nDiscuss in learning teams (15 minutes):\n\nIntroduce yourself briefly\nDiscuss your experience with Object Oriented Programming, why are classes useful?\nMention some problems with poorly designed code\n\nYour own experience\nFrom the book\n\n\nAfter break out session:\n\nSelected person from each team briefly presents their discussion" + "objectID": "07_packaging.html#pyproject.toml", + "href": "07_packaging.html#pyproject.toml", + "title": "Distributing your Python package", + "section": "pyproject.toml", + "text": "pyproject.toml\n[build-system]\nrequires = [\"hatchling\"]\nbuild-backend = \"hatchling.build\"\n\n[project]\nname = \"my_library\"\nversion = \"0.0.1\"\ndependencies = [\n \"numpy\"\n]\n\nauthors = [\n { name=\"First Last\", email=\"initials@dhigroup.com\" },\n]\ndescription = \"Useful library\"\nreadme = \"README.md\"\nrequires-python = \">=3.7\"\nclassifiers = [\n \"Programming Language :: Python :: 3\",\n \"License :: OSI Approved :: MIT License\",\n \"Development Status :: 2 - Pre-Alpha\",\n \"Operating System :: OS Independent\",\n \"Topic :: Scientific/Engineering\",\n]\n\n[project.optional-dependencies]\ndev = [\"pytest\",\"flake8\",\"black\",\"sphinx\", \"myst-parser\",\"sphinx-book-theme\"]\ntest= [\"pytest\"]\n\n[project.urls]\n\"Homepage\" = \"https://github.com/DHI/my_library\"\n\"Bug Tracker\" = \"https://github.com/DHI/my_library/issues\"" }, { - "objectID": "02_function_classes.html#classes", - "href": "02_function_classes.html#classes", - "title": "Functions, classes and modules", - "section": "Classes", - "text": "Classes\nclass WeirdToolbox:\n tools = [] # class variable ☹️\n\n\n>>> t1 = WeirdToolbox()\n>>> t1.tools.append(\"hammer\")\n>>> t1.tools\n[\"hammer\"]\n\n>>> t2 = WeirdToolbox()\n>>> t2.tools.append(\"screwdriver\")\n>>> t2.tools\n[\"hammer\", \"screwdriver\"]\n\nClass variables are rarely what you want, since they are shared between all instances of the class." + "objectID": "07_packaging.html#versioning", + "href": "07_packaging.html#versioning", + "title": "Distributing your Python package", + "section": "Versioning", + "text": "Versioning\nVersioning your package is important for reproducibility and to avoid breaking changes.\n\n\n\nSemantic versioning use three numbers {major}.{minor}.{patch}, e.g. 1.1.0\n\n\nA new major version indicates breaking changes\nA new minor version indicates new features, without breaking changes\nA new patch version indicates a small change, e.g. a bug fix\nEach of the numbers can be higher than 9, e.g. 1.0.0 is more recent than 0.24.12" }, { - "objectID": "02_function_classes.html#classes-1", - "href": "02_function_classes.html#classes-1", - "title": "Functions, classes and modules", - "section": "Classes", - "text": "Classes\nclass Toolbox:\n def __init__(self):\n self.tools = [] # instance variable 😃\n\n>>> t1 = Toolbox()\n>>> t1.tools.append(\"hammer\")\n>>> t1.tools\n[\"hammer\"]\n\n>>> t2 = Toolbox()\n>>> t2.tools.append(\"screwdriver\")\n>>> t2.tools\n[\"screwdriver\"]\n\nInstance variables are created when the instance is created, and are unique to each instance." + "objectID": "07_packaging.html#version-1.0", + "href": "07_packaging.html#version-1.0", + "title": "Distributing your Python package", + "section": "Version 1.0", + "text": "Version 1.0\n\n\nA version number of 1.0 indicates that the package is ready for production\nThe API is stable, and breaking changes will only be introduced in new major versions\nThe package is well tested, and the documentation is complete\nStart with version 0.1.0 and increase the version number as you add features" }, { - "objectID": "02_function_classes.html#static-methods", - "href": "02_function_classes.html#static-methods", - "title": "Functions, classes and modules", - "section": "Static methods", - "text": "Static methods\nfrom datetime import date\n\nclass Interval:\n def __init__(self, start:date, end:date):\n self.start = start\n self.end = end\n\n>>> dr = Interval(date(2020, 1, 1), date(2020, 1, 31))\n>>> dr.start\ndatetime.date(2020, 1, 1)\n>>> dr.end\ndatetime.date(2020, 1, 31)\n\nHere is an example of useful class, but it is a bit cumbersome to create an instance." + "objectID": "07_packaging.html#breaking-changes", + "href": "07_packaging.html#breaking-changes", + "title": "Distributing your Python package", + "section": "Breaking changes", + "text": "Breaking changes\nWhat is a breaking change?\n\n\nRemoving a function\nChanging the name of a function\nChanging the signature of a function (arguments, types, return value)\n\n\n\nTry to avoid breaking changes, if possible, but if you do, increase the major version number!" }, { - "objectID": "02_function_classes.html#static-methods-1", - "href": "02_function_classes.html#static-methods-1", - "title": "Functions, classes and modules", - "section": "Static methods", - "text": "Static methods\nfrom datetime import date\n\nclass Interval:\n def __init__(self, start:date, end:date):\n self.start = start\n self.end = end\n\n @staticmethod\n def from_string(date_string):\n start_str, end_str = date_string.split(\"|\")\n start = date.fromisoformat(start_str)\n end = date.fromisoformat(end_str)\n return Interval(start, end)\n\n>>> dr = Interval.from_string(\"2020-01-01|2020-01-31\")\n>>> dr\n<__main__.Interval at 0x7fb99efcfb90>\n\nSince we commonly use ISO formatted dates separated by a pipe, we can add a static method to create an instance from a string. This makes it easier to create an instance." + "objectID": "07_packaging.html#installing-specific-versions", + "href": "07_packaging.html#installing-specific-versions", + "title": "Distributing your Python package", + "section": "Installing specific versions", + "text": "Installing specific versions\n\npip install my_library will install the latest version\npip install my_library==1.0.0 will install version 1.0.0\npip install my_library>=1.0.0 will install version 1.0.0 or higher" }, { - "objectID": "02_function_classes.html#dataclasses", - "href": "02_function_classes.html#dataclasses", - "title": "Functions, classes and modules", - "section": "Dataclasses", - "text": "Dataclasses\nfrom dataclasses import dataclass\n\n@dataclass\nclass Interval:\n start: date\n end: date\n\n @staticmethod\n def from_string(date_string):\n start_str, end_str = date_string.split(\"|\")\n start = date.fromisoformat(start_str)\n end = date.fromisoformat(end_str)\n return Interval(start, end)\n\n>>> dr = Interval.from_string(\"2020-01-01|2020-01-31\")\n>>> dr\nInterval(start=datetime.date(2020, 1, 1), end=datetime.date(2020, 1, 31))\n\nDataclasses are a new feature in Python 3.7, they are a convenient way to create classes with a few attributes. The variables are instance variables, and the class has a constructor that takes the same arguments as the variables." + "objectID": "07_packaging.html#pre-release-versions", + "href": "07_packaging.html#pre-release-versions", + "title": "Distributing your Python package", + "section": "Pre-release versions", + "text": "Pre-release versions\n\n\n\nVersions that are not ready for production\nIndicated by a suffix, e.g. 1.0.0rc1\nWill not be installed by default\nCan be installed with pip install my_library==1.0.0rc1\nListed on PyPI, but not on the search page" }, { - "objectID": "02_function_classes.html#equality", - "href": "02_function_classes.html#equality", - "title": "Functions, classes and modules", - "section": "Equality", - "text": "Equality\nOn a regular class, equality is based on the memory address of the object.\nclass Interval:\n def __init__(self, start:date, end:date):\n self.start = start\n self.end = end\n\n>>> dr1 = Interval(start=date(2020, 1, 1), end=date(2020, 1, 31))\n>>> dr2 = Interval(start=date(2020, 1, 1), end=date(2020, 1, 31))\n>>> dr1 == dr2\nFalse\n\nThis is not very useful, since we want to compare the values of the attributes." + "objectID": "07_packaging.html#license", + "href": "07_packaging.html#license", + "title": "Distributing your Python package", + "section": "License", + "text": "License\n\n\nA license is a legal agreement between you and others who use your package\nIf you do not specify a license, others cannot use your package legally\nThe license is specified in the pyproject.toml file\nRead more about licenses on https://choosealicense.com/\nCheck if your package is compatible with the license of the dependencies" }, { - "objectID": "02_function_classes.html#equality-1", - "href": "02_function_classes.html#equality-1", - "title": "Functions, classes and modules", - "section": "Equality", - "text": "Equality\nclass Interval:\n def __init__(self, start:date, end:date):\n self.start = start\n self.end = end\n\n def __eq__(self, other):\n return self.start == other.start and self.end == other.end\n\n>>> dr1 = Interval(start=date(2020, 1, 1), end=date(2020, 1, 31))\n>>> dr2 = Interval(start=date(2020, 1, 1), end=date(2020, 1, 31))\n>>> dr1 == dr2\nTrue\n\nWe can override the __eq__ method to compare the values of the attributes." + "objectID": "07_packaging.html#dependencies", + "href": "07_packaging.html#dependencies", + "title": "Distributing your Python package", + "section": "Dependencies", + "text": "Dependencies\n\n\nApplication\nA program that is run by a user\n\ncommand line tool\nscript\nweb application\n\nPin versions to ensure reproducibility, e.g. numpy==1.11.0\n\nLibrary\nA program that is used by another program\n\nPython package\nLow level library (C, Fortran, Rust, …)\n\nMake the requirements as loose as possible, e.g. numpy>=1.11.0\n\n\n\nMake the requirements loose, to avoid conflicts with other packages." }, { - "objectID": "02_function_classes.html#data-classes", - "href": "02_function_classes.html#data-classes", - "title": "Functions, classes and modules", - "section": "Data classes", - "text": "Data classes\nfrom dataclasses import dataclass, field\n\n@dataclass\nclass Quantity:\n unit: str = field(compare=True)\n standard_name: field(compare=True)\n name: str = field(compare=False, default=None)\n\n\n>>> t1 = Quantity(name=\"temp\", unit=\"C\", standard_name=\"air_temperature\")\n>>> t2 = Quantity(name=\"temperature\", unit=\"C\", standard_name=\"air_temperature\")\n\n>>> t1 == t2\nTrue\n\n>>> d1 = Quantity(unit=\"m\", standard_name=\"depth\")\n>>> d1 == t2\nFalse" + "objectID": "07_packaging.html#pyproject.toml-1", + "href": "07_packaging.html#pyproject.toml-1", + "title": "Distributing your Python package", + "section": "pyproject.toml", + "text": "pyproject.toml\n[build-system]\nrequires = [\"hatchling\"]\nbuild-backend = \"hatchling.build\"\n\n[project]\nname = \"my_library\"\nversion = \"0.0.1\"\ndependencies = [\n \"numpy\"\n]\n\nauthors = [\n { name=\"First Last\", email=\"initials@dhigroup.com\" },\n]\ndescription = \"Useful library\"\nreadme = \"README.md\"\nrequires-python = \">=3.7\"\nclassifiers = [\n \"Programming Language :: Python :: 3\",\n \"License :: OSI Approved :: MIT License\",\n \"Development Status :: 2 - Pre-Alpha\",\n \"Operating System :: OS Independent\",\n \"Topic :: Scientific/Engineering\",\n]\n\n[project.optional-dependencies]\ndev = [\"pytest\",\"flake8\",\"black\",\"sphinx\", \"myst-parser\",\"sphinx-book-theme\"]\ntest= [\"pytest\"]\n\n[project.urls]\n\"Homepage\" = \"https://github.com/DHI/my_library\"\n\"Bug Tracker\" = \"https://github.com/DHI/my_library/issues\"\n\n\nMandatory dependencies are specified in the dependencies section.\nOptional dependencies are specified in the optional-dependencies section." }, { - "objectID": "02_function_classes.html#data-classes-1", - "href": "02_function_classes.html#data-classes-1", - "title": "Functions, classes and modules", - "section": "Data classes", - "text": "Data classes\n\n\nCompact notation of fields with type hints\nEquality based on values of fields\nUseful string represenation by default\nIt is still a regular class" + "objectID": "07_packaging.html#classifiers", + "href": "07_packaging.html#classifiers", + "title": "Distributing your Python package", + "section": "Classifiers", + "text": "Classifiers\nclassifiers = [\n \"Programming Language :: Python :: 3\",\n \"License :: OSI Approved :: MIT License\",\n \"Development Status :: 2 - Pre-Alpha\",\n \"Operating System :: OS Independent\",\n \"Topic :: Scientific/Engineering\",\n]\n\nClassifiers are used to categorize your package\nLess relevant for internal packages\nOperating system (Windows, Linux, MacOS)\nDevelopment status (Alpha, Beta, Production/Stable)" }, { - "objectID": "02_function_classes.html#modules", - "href": "02_function_classes.html#modules", - "title": "Functions, classes and modules", - "section": "Modules", - "text": "Modules\nModules are files containing Python code (functions, classes, constants) that belong together.\n$tree analytics/\nanalytics/\n├── __init__.py\n├── date.py\n└── tools.py\n\nThe analytics package contains two modules:\n\ntools module\ndate module" + "objectID": "07_packaging.html#packaging-non-python-files", + "href": "07_packaging.html#packaging-non-python-files", + "title": "Distributing your Python package", + "section": "Packaging non-Python files", + "text": "Packaging non-Python files\n\nIncluding non-Python files can be useful for e.g. machine learning models.\nIf you use hatchling, you can include non-Python files in your package.\nhatchling uses .gitignore to determine which files to include." }, { - "objectID": "02_function_classes.html#packages", - "href": "02_function_classes.html#packages", - "title": "Functions, classes and modules", - "section": "Packages", - "text": "Packages\n\n\nA package is a directory containing modules\nEach package in Python is a directory which MUST contain a special file called __init__.py\nThe __init__.py can be empty, and it indicates that the directory it contains is a Python package\n__init__.py can also execute initialization code" + "objectID": "07_packaging.html#github-secrets", + "href": "07_packaging.html#github-secrets", + "title": "Distributing your Python package", + "section": "GitHub secrets", + "text": "GitHub secrets\n\nStore sensitive information, e.g. passwords, in your repository.\nSecrets are encrypted, and only visible to you and GitHub Actions.\nAdd secrets in the repository settings.\n\nTo use secrets as environment variables in GitHub Actions, add them to the env section of the workflow:\nenv:\n TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}\n TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}" }, { - "objectID": "02_function_classes.html#init__.py", - "href": "02_function_classes.html#init__.py", - "title": "Functions, classes and modules", - "section": "__init__.py", - "text": "__init__.py\nExample: mikeio/pfs/__init__.py:\nfrom .pfsdocument import Pfs, PfsDocument\nfrom .pfssection import PfsNonUniqueList, PfsSection\n\ndef read_pfs(filename, encoding=\"cp1252\", unique_keywords=False):\n \"\"\"Read a pfs file for further analysis/manipulation\"\"\"\n \n return PfsDocument(filename, encoding=encoding, unique_keywords=unique_keywords)\n\nThe imports in __init__.py let’s you separate the implementation into multiple files.\n>>> mikeio.pfs.pfssection.PfsSection\n<class 'mikeio.pfs.pfssection.PfsSection'>\n>>> mikeio.pfs.PfsSection\n<class 'mikeio.pfs.pfssection.PfsSection'>\n\nThe PfsSection and PfsDocument are imported from the pfssection.py and pfsdocument.py modules. to the mikeio.pfs namespace." + "objectID": "07_packaging.html#github-actions", + "href": "07_packaging.html#github-actions", + "title": "Distributing your Python package", + "section": "GitHub Actions", + "text": "GitHub Actions\n\n\n.github/workflows/python-package.yml\n\nname: Publish Python Package\non:\n release:\n types: [created]\njobs:\n deploy:\n runs-on: ubuntu-latest\n steps:\n - uses: actions/checkout@v2\n - name: Set up Python\n uses: actions/setup-python@v2\n with:\n python-version: '3.10'\n - name: Install dependencies\n run: |\n python -m pip install --upgrade pip\n pip install build\n - name: Build package\n run: python -m build\n \n - name: Publish to PyPI\n env:\n TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}\n TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}\n run: |\n twine upload dist/*" }, { - "objectID": "02_function_classes.html#how-generic-should-your-code-be", - "href": "02_function_classes.html#how-generic-should-your-code-be", - "title": "Functions, classes and modules", - "section": "How generic should your code be?", - "text": "How generic should your code be?\n\nA good rule of thumb is to make your code as generic as possible, but no more. 🙄\n\n\n\n\n\nTry to anticipate the future, but not too much!" + "objectID": "07_packaging.html#private-pypi-server", + "href": "07_packaging.html#private-pypi-server", + "title": "Distributing your Python package", + "section": "Private PyPI server", + "text": "Private PyPI server\n\nPrivate packages can be be hosted on e.g. Azure Arfifacts or Posit Package Manager.\nThese servers behaves like PyPI, and can be used with pip\nAccess policies can be used to control who can install packages.\n\n\nExample:\n$ pip install --extra-index-url https://pkgs.dev.azure.com/dhigroup/_packaging/pond/pypi/simple/ sampling\nLooking in indexes: https://pypi.org/simple, https://pkgs.dev.azure.com/dhigroup/_packaging/pond/pypi/simple/\n...\nSuccessfully installed sampling-0.0.1" }, { - "objectID": "02_function_classes.html#homework-until-next-week", - "href": "02_function_classes.html#homework-until-next-week", - "title": "Functions, classes and modules", - "section": "Homework until next week", - "text": "Homework until next week\n\n\nCreate a repository on GitHub to store your code\nCreate a README.md file describing the purpose of the repository. (course project, …\nWrite a function to calculate the fraction of values within a given range, with the lower bound optional\n\nName, inputs, output, types\n\nCreate a class to represent sensor data, e.g. water levels, temperature measured with a fixed interval\n\nName, attributes, methods?\n\n\n\n\n✨ See you next week! 🚀\n\n\n\nPython package development" + "objectID": "07_packaging.html#installing-a-development-version", + "href": "07_packaging.html#installing-a-development-version", + "title": "Distributing your Python package", + "section": "Installing a development version", + "text": "Installing a development version\n\nInstall latest dev version, e.g. pip install https://github.com/DHI/mikeio/archive/main.zip\nInstall from fix-interp branch, e.g. pip install https://github.com/DHI/mikeio/archive/fix-interp.zip" }, { - "objectID": "03_types_protocols.html#types", - "href": "03_types_protocols.html#types", - "title": "Python types, interfaces and protocols", - "section": "Types", - "text": "Types\nC#\nint n = 2;\nString s = \"Hello\";\n\npublic String RepeatedString(String s, int n) {\n return Enumerable.Repeat(s, n).Aggregate((a, b) => a + b);\n}\n\nPython\nn = 2\ns = \"Hello\"\n\ndef repeated_string(s, n):\n return s * n" + "objectID": "07_packaging.html#recap", + "href": "07_packaging.html#recap", + "title": "Distributing your Python package", + "section": "Recap", + "text": "Recap\n\nGit, Pull Requests, and code reviews\nPython functions, classes, and modules\nTypes, abstraction, and refactoring\nTesting and auto-formatting\nDependencies and GitHub actions\nDocumentation\nDistributing your package" }, { - "objectID": "03_types_protocols.html#types-1", - "href": "03_types_protocols.html#types-1", - "title": "Python types, interfaces and protocols", - "section": "Types", - "text": "Types\n\n\nPython is a dynamically typed language\nTypes are not checked at compile time\nTypes are checked at runtime\n\n\n\nPython with type hints\nn: int = 2\ns: str = \"Hello\"\n\ndef repeated_string(s:str, n:int) -> str:\n return s * n" + "objectID": "07_packaging.html#git-pull-requests-and-code-reviews", + "href": "07_packaging.html#git-pull-requests-and-code-reviews", + "title": "Distributing your Python package", + "section": "Git, Pull Requests, and code reviews", + "text": "Git, Pull Requests, and code reviews" }, { - "objectID": "03_types_protocols.html#abstraction", - "href": "03_types_protocols.html#abstraction", - "title": "Python types, interfaces and protocols", - "section": "Abstraction", - "text": "Abstraction\n\n\nVersion A\ntotal = 0.0\nfor x in values:\n total = total +x\n\nVersion B\ntotal = sum(values)\n\n\n\n\n\nUsing functions, e.g. sum() allows us to operate on a higher level of abstraction.\nToo little abstraction will force you to write many lines of boiler-plate code\nToo much abstraction limits the flexibility\n✨Find the right level of abstraction!✨\n\n\n\n\nWhich version is easiest to understand?\nWhich version is easiest to change?" + "objectID": "07_packaging.html#github-flow", + "href": "07_packaging.html#github-flow", + "title": "Distributing your Python package", + "section": "Github flow", + "text": "Github flow\n\n\nCreate a branch\nMake changes\nCreate a pull request\nReview\nMerge" }, { - "objectID": "03_types_protocols.html#collections-abstract-base-classes", - "href": "03_types_protocols.html#collections-abstract-base-classes", - "title": "Python types, interfaces and protocols", - "section": "Collections Abstract Base Classes", - "text": "Collections Abstract Base Classes\n\n\n\n\nclassDiagram\n Container <|-- Collection\n Sized <|-- Collection\n Iterable <|-- Collection\n \n class Container{\n __contains__(self, x)\n }\n\n class Sized{\n __len__(self)\n }\n\n class Iterable{\n __iter__(self)\n }\n\n\n\n\n\n\n\n\nIf a class implements __len__ it is a Sized object.\nIf a class implements __contains__ it is a Container object.\nIf a class implements __iter__ it is a Iterable object." + "objectID": "07_packaging.html#github-best-practices", + "href": "07_packaging.html#github-best-practices", + "title": "Distributing your Python package", + "section": "Github best practices", + "text": "Github best practices\n\nCommit often\nUse descriptive commit messages\nKeep pull requests small and focused\nUse “issues” to track work\nReview code regularly" }, { - "objectID": "03_types_protocols.html#collections-abstract-base-classes-1", - "href": "03_types_protocols.html#collections-abstract-base-classes-1", - "title": "Python types, interfaces and protocols", - "section": "Collections Abstract Base Classes", - "text": "Collections Abstract Base Classes\n>>> a = [1, 2, 3]\n>>> 1 in a\nTrue\n>>> a.__contains__(1)\nTrue\n>>> len(a)\n3\n>>> a.__len__()\n3\n>>> for x in a:\n... v.append(x)\n>>> it = a.__iter__()\n>>> next(it)\n1\n>>> next(it)\n2\n>>> next(it)\n3\n>>> next(it)\nTraceback (most recent call last):\n File \"<stdin>\", line 1, in <module>\nStopIteration" + "objectID": "07_packaging.html#python-functions-classes-and-modules", + "href": "07_packaging.html#python-functions-classes-and-modules", + "title": "Distributing your Python package", + "section": "Python functions, classes, and modules", + "text": "Python functions, classes, and modules" }, { - "objectID": "03_types_protocols.html#collections-abstract-base-classes-2", - "href": "03_types_protocols.html#collections-abstract-base-classes-2", - "title": "Python types, interfaces and protocols", - "section": "Collections Abstract Base Classes", - "text": "Collections Abstract Base Classes\n\n\n\n\n\nclassDiagram\n Container <|-- Collection\n Sized <|-- Collection\n Iterable <|-- Collection\n Collection <|-- Sequence\n Collection <|-- Set\n Sequence <|-- MutableSequence\n Mapping <|-- MutableMapping\n Collection <|-- Mapping\n\n MutableSequence <|-- List\n Sequence <|-- Tuple\n MutableMapping <|-- Dict" + "objectID": "07_packaging.html#functions-as-black-boxes", + "href": "07_packaging.html#functions-as-black-boxes", + "title": "Distributing your Python package", + "section": "Functions as black boxes", + "text": "Functions as black boxes\n\n\n\n\nflowchart LR\n A(Input A) --> F[\"Black box\"]\n B(Input B) --> F\n F --> O(Output)\n\n style F fill:#000,color:#fff,stroke:#333,stroke-width:4px\n\n\n\n\n\n\nA function is a black box that takes some input and produces some output.\nThe input and output can be anything, including other functions.\nAs long as the input and output are the same, the function body can be modified." }, { - "objectID": "03_types_protocols.html#pythonic", - "href": "03_types_protocols.html#pythonic", - "title": "Python types, interfaces and protocols", - "section": "Pythonic", - "text": "Pythonic\nIf you want your code to be Pythonic, you have to be familiar with these types and their methods.\nDundermethods:\n\n__getitem__\n__setitem__\n__len__\n__contains__\n…" + "objectID": "07_packaging.html#naming-conventions---general", + "href": "07_packaging.html#naming-conventions---general", + "title": "Distributing your Python package", + "section": "Naming conventions - general", + "text": "Naming conventions - general\n\nUse lowercase characters\nSeparate words with underscores\n\nmodel_name = \"NorthSeaModel\"\nn_epochs = 100\n\ndef my_function():\n pass" }, { - "objectID": "03_types_protocols.html#duck-typing", - "href": "03_types_protocols.html#duck-typing", - "title": "Python types, interfaces and protocols", - "section": "Duck typing", - "text": "Duck typing\n\n\n“If it walks like a duck and quacks like a duck, it’s a duck”\nFrom the perspective of the caller, it doesn’t matter if it is a rubber duck or a real duck.\nThe type of the object is not important, as long as it has the right methods." + "objectID": "07_packaging.html#constants", + "href": "07_packaging.html#constants", + "title": "Distributing your Python package", + "section": "Constants", + "text": "Constants\n\nUse all uppercase characters\n\nGRAVITY = 9.81\n\nAVOGADRO_CONSTANT = 6.02214076e23\n\nSECONDS_IN_A_DAY = 86400\n\nN_LEGS_PER_ANIMAL = {\n \"human\": 2,\n \"dog\": 4,\n \"spider\": 8,\n}" }, { - "objectID": "03_types_protocols.html#lets-revisit-the-date-interval", - "href": "03_types_protocols.html#lets-revisit-the-date-interval", - "title": "Python types, interfaces and protocols", - "section": "Let’s revisit the (date) Interval1", - "text": "Let’s revisit the (date) Interval1\nclass Interval:\n def __init__(self, start, end):\n self.start = start\n self.end = end\n\n def __contains__(self, x):\n return self.start < x < self.end\n\n>>> dr = Interval(date(2020, 1, 1), date(2020, 1, 31))\n\n>>> date(2020,1,15) in dr\nTrue\n>>> date(1970,1,1) in dr\nFalse\nhttps://martinfowler.com/eaaDev/Range.html" + "objectID": "07_packaging.html#classes", + "href": "07_packaging.html#classes", + "title": "Distributing your Python package", + "section": "Classes", + "text": "Classes\n\nUse CamelCase for the name of the class\nUse lowercase characters for the name of the methods\nSeparate words with underscores\n\nclass RandomClassifier:\n\n def fit(self, X, y):\n self.classes_ = np.unique(y)\n\n def predict(self, X):\n return np.random.choice(self.classes_, size=len(X))\n\n def fit_predict(self, X, y):\n self.fit(X, y)\n return self.predict(X)" }, { - "objectID": "03_types_protocols.html#some-other-interval", - "href": "03_types_protocols.html#some-other-interval", - "title": "Python types, interfaces and protocols", - "section": "Some other interval", - "text": "Some other interval\nclass Interval:\n def __init__(self, start, end):\n self.start = start\n self.end = end\n\n def __contains__(self, x):\n return self.start < x < self.end\n \n>>> interval = Interval(5, 10)\n\n>>> 8 in interval\nTrue\n>>> 12 in interval\nFalse\n\nAs long as the start, end and x are comparable, the Interval class is a generic class able to handle integers, floats, dates, datetimes, strings …" + "objectID": "07_packaging.html#dataclasses", + "href": "07_packaging.html#dataclasses", + "title": "Distributing your Python package", + "section": "Dataclasses", + "text": "Dataclasses\nimport datetime\nfrom dataclasses import dataclass\n\n\n@dataclass\nclass Interval:\n start: date\n end: date\n\n>>> dr1 = Interval(start=datetime.date(2020, 1, 1), end=datetime.date(2020, 1, 31))\n>>> dr1\nInterval(start=datetime.date(2020, 1, 1), end=datetime.date(2020, 1, 31))\n>>> dr2 = Interval(start=datetime.date(2020, 1, 1), end=datetime.date(2020, 1, 31))\n>>> dr1 == dr2\nTrue" }, { - "objectID": "03_types_protocols.html#time-for-a-discussion", - "href": "03_types_protocols.html#time-for-a-discussion", - "title": "Python types, interfaces and protocols", - "section": "Time for a discussion", - "text": "Time for a discussion\nPre-work for this Module\n\nRead chapters\n\nAbstraction and encapsulation\nDesigning for high performance\n\nin Hillard, 2020\nDo you need to understand every single line of code you use, or is it sometimes enough to treat functions as a black box?\nConsider the implications of storing data in a list or in a dictionary." + "objectID": "07_packaging.html#types-abstraction-and-refactoring", + "href": "07_packaging.html#types-abstraction-and-refactoring", + "title": "Distributing your Python package", + "section": "Types, abstraction, and refactoring", + "text": "Types, abstraction, and refactoring" }, { - "objectID": "03_types_protocols.html#time-for-a-discussion-1", - "href": "03_types_protocols.html#time-for-a-discussion-1", - "title": "Python types, interfaces and protocols", - "section": "Time for a discussion", - "text": "Time for a discussion\nDiscuss in learning teams (15 minutes):\nChapter 3: Abstraction and encapsulation\nWhat are some pros/cons of treating functions as black boxes?\n\nChapter 4: Designing for high performance\nWhat is the performance impact of using a list instead of a dictionary?\n\n\nAfter break out session:\nOne person from each team briefly presents their discussion outcomes" + "objectID": "07_packaging.html#pythonic", + "href": "07_packaging.html#pythonic", + "title": "Distributing your Python package", + "section": "Pythonic", + "text": "Pythonic\nIf you want your code to be Pythonic, you have to be familiar with these types and their methods.\nDundermethods:\n\n__getitem__\n__setitem__\n__len__\n__contains__\n…" }, { - "objectID": "03_types_protocols.html#protocols", - "href": "03_types_protocols.html#protocols", - "title": "Python types, interfaces and protocols", - "section": "Protocols", - "text": "Protocols\n\n\nProtocols were introduced in Python 3.8\nProtocols are a way to define a set of methods that a class must implement\nProtocols are not checked at runtime\nProtocols can be used for static type checking (mypy)\nProtocols are a hint to the developer" + "objectID": "07_packaging.html#duck-typing", + "href": "07_packaging.html#duck-typing", + "title": "Distributing your Python package", + "section": "Duck typing", + "text": "Duck typing\n\n“If it walks like a duck and quacks like a duck, it’s a duck”\nFrom the perspective of the caller, it doesn’t matter if it is a rubber duck or a real duck.\nThe type of the object is not important, as long as it has the right methods." }, { - "objectID": "03_types_protocols.html#protocols-1", - "href": "03_types_protocols.html#protocols-1", - "title": "Python types, interfaces and protocols", - "section": "Protocols", - "text": "Protocols\nfrom dataclasses import dataclass\nfrom typing import Protocol, List\nimport matplotlib.pyplot as plt\n\n\n@dataclass\nclass Station:\n name: str\n longitude: float\n latitude: float\n\n\nclass StationRepository(Protocol):\n def __getitem__(self, key: str) -> Station:\n ...\n\n\ndef plot_stations(station_ids: List[int], repo: StationRepository) -> None:\n for id in station_ids:\n station = repo[id]\n plt.plot(station.longitude, station.latitude, \"o\")\n plt.text(station.longitude, station.latitude, station.name)" + "objectID": "07_packaging.html#testing-and-auto-formatting", + "href": "07_packaging.html#testing-and-auto-formatting", + "title": "Distributing your Python package", + "section": "Testing and auto-formatting", + "text": "Testing and auto-formatting" }, { - "objectID": "03_types_protocols.html#protocols-2", - "href": "03_types_protocols.html#protocols-2", - "title": "Python types, interfaces and protocols", - "section": "Protocols", - "text": "Protocols\nclass DictStationRepository: # useful for testing (Module 4)\n def __getitem__(self, key: int) -> Station:\n stations = {\n 42: Station(\"Antwerp\", 4.42, 51.22),\n 365: Station(\"Brussels\", 4.35, 50.85),\n 182: Station(\"Ghent\", 3.72, 51.05),\n }\n return stations[key]\n\n\nclass FileStationRepository:\n def __getitem__(self, key: int) -> Station:\n df = pd.read_csv(\"stations.csv\", index_col=\"id\")\n row = df.loc[key]\n return Station(row[\"name\"], key, row[\"longitude\"], row[\"latitude\"])\n\n\nclass DatabaseStationRepository:\n def __getitem__(self, key: int) -> Station:\n # read from database\n station = read_from_db(key)\n return station" + "objectID": "07_packaging.html#unit-testing", + "href": "07_packaging.html#unit-testing", + "title": "Distributing your Python package", + "section": "Unit testing", + "text": "Unit testing\n\n\n\n\n\n\nDefinition “Unit”\n\n\n\nA small, fundamental piece of code.\nExecuted in isolation with appropriate inputs.\n\n\n\n\n\nA function is typically considered a “unit”\nLines of code within functions are smaller (can’t be isolated)\nClasses are considered bigger (but can be treated as units)" }, { - "objectID": "03_types_protocols.html#protocols-3", - "href": "03_types_protocols.html#protocols-3", - "title": "Python types, interfaces and protocols", - "section": "Protocols", - "text": "Protocols\nrepo = DictStationRepository()\n# repo = FileStationRepository()\n\nplot_stations([42, 365], repo=repo)\n\nProtocols allows to work with abstract types, without having to know the implementation details.\nThis is useful for testing, but allows for more flexible code." + "objectID": "07_packaging.html#a-good-unit-test", + "href": "07_packaging.html#a-good-unit-test", + "title": "Distributing your Python package", + "section": "A good unit test", + "text": "A good unit test\n\nFully automated\nHas full control over all the pieces running (“fake” external dependencies)\nCan be run in any order\nRuns in memory (no DB or file access, for example)\nConsistently returns the same result (no random numbers)\nRuns fast\nTests a single logical concept in the system\nReadable\nMaintainable\nTrustworthy" }, { - "objectID": "03_types_protocols.html#postels-law", - "href": "03_types_protocols.html#postels-law", - "title": "Python types, interfaces and protocols", - "section": "Postel’s law", - "text": "Postel’s law\na.k.a. the Robustness principle of software design\n\nBe liberal in what you accept\nBe conservative in what you send\n\n\ndef process(number: Union[int,str,float]) -> int:\n # make sure number is an int from now on\n number = int(number)\n\n result = number * 2\n return result" + "objectID": "07_packaging.html#thank-you", + "href": "07_packaging.html#thank-you", + "title": "Distributing your Python package", + "section": "Thank you!", + "text": "Thank you!\n\n\n\nPython package development" }, { - "objectID": "03_types_protocols.html#section", - "href": "03_types_protocols.html#section", - "title": "Python types, interfaces and protocols", - "section": "", - "text": "The consumers of your package (future self), will be grateful if you are not overly restricitive in what types you accept as input." + "objectID": "04_testing.html#testing", + "href": "04_testing.html#testing", + "title": "Testing, linting and formatting", + "section": "Testing", + "text": "Testing\nVerify code is working as expected.\nSimplest way to test is to run code and check output.\n\nAutomated testing checks output automatically.\nCode changes can break other parts of code.\nAutomatic testing verifies code is still working." }, { - "objectID": "03_types_protocols.html#refactoring", - "href": "03_types_protocols.html#refactoring", - "title": "Python types, interfaces and protocols", - "section": "Refactoring", - "text": "Refactoring\n\n\nRefactoring is a way to improve the design of existing code\nChanging a software system in such a way that it does not alter the external behavior of the code, yet improves its internal structure\nRefactoring is a way to make code more readable and maintainable\nHousekeeping" + "objectID": "04_testing.html#testing-workflow", + "href": "04_testing.html#testing-workflow", + "title": "Testing, linting and formatting", + "section": "Testing workflow", + "text": "Testing workflow\n\n\n\n\nflowchart TD\n A[Prepare inputs]\n B[Describe expected output]\n C[Obtain actual output]\n D[Compare actual and\\n expected output]\n\n A --> B --> C --> D" }, { - "objectID": "03_types_protocols.html#common-refactoring-techniques", - "href": "03_types_protocols.html#common-refactoring-techniques", - "title": "Python types, interfaces and protocols", - "section": "Common refactoring techniques:", - "text": "Common refactoring techniques:\n\nExtract method\nExtract variable\nRename method\nRename variable\nRename class\nInline method\nInline variable\nInline class" + "objectID": "04_testing.html#unit-testing", + "href": "04_testing.html#unit-testing", + "title": "Testing, linting and formatting", + "section": "Unit testing", + "text": "Unit testing\n\n\n\n\n\n\nDefinition “Unit”\n\n\n\nA small, fundamental piece of code.\nExecuted in isolation with appropriate inputs.\n\n\n\n\n\n\nA function is typically considered a “unit”\nLines of code within functions are smaller (can’t be isolated)\nClasses are considered bigger (but can be treated as units)" }, { - "objectID": "03_types_protocols.html#rename-variable", - "href": "03_types_protocols.html#rename-variable", - "title": "Python types, interfaces and protocols", - "section": "Rename variable", - "text": "Rename variable\nBefore\nn = 0\nfor v in y:\n if v < 0:\n n = n + 1\n\nAfter\nFREEZING_POINT = 0.0\nn_freezing_days = 0\nfor temp in daily_max_temperatures:\n if temp < FREEZING_POINT:\n n_freezing_days = n_freezing_days + 1" + "objectID": "04_testing.html#a-good-unit-test", + "href": "04_testing.html#a-good-unit-test", + "title": "Testing, linting and formatting", + "section": "A good unit test", + "text": "A good unit test\n\n\n\n\nFully automated (next week)\nHas full control over all the pieces running (“fake” external dependencies)\nCan be run in any order\nRuns in memory (no DB or file access, for example)\nConsistently returns the same result (no random numbers)\nRuns fast\nTests a single logical concept in the system\nReadable\nMaintainable\nTrustworthy" }, { - "objectID": "03_types_protocols.html#extract-variable", - "href": "03_types_protocols.html#extract-variable", - "title": "Python types, interfaces and protocols", - "section": "Extract variable", - "text": "Extract variable\nBefore\ndef predict(x):\n return min(0.0, 0.5 + 2.0 * min(0,x) + (random.random() - 0.5) / 10.0)\n\nAfter\ndef predict(x):\n scale = 10.0\n error = (random.random() - 0.5) / scale)\n a = 0.5\n b = 2.0 \n draft = a + b * x + error\n return min(0.0, draft)" + "objectID": "04_testing.html#example", + "href": "04_testing.html#example", + "title": "Testing, linting and formatting", + "section": "Example", + "text": "Example\n\nget a timeseries of water levels\nfind the maxiumum water level each year\ncreate a summary report for the subset of data" }, { - "objectID": "03_types_protocols.html#extract-method", - "href": "03_types_protocols.html#extract-method", - "title": "Python types, interfaces and protocols", - "section": "Extract method", - "text": "Extract method\ndef error(scale):\n return (random.random() - 0.5) / scale)\n\ndef linear_model(x, *, a=0.0, b=1.0):\n return a + b * x\n\ndef clip(x, *, min_value=0.0):\n return min(min_value, x)\n\ndef predict(x): \n draft = linear_model(x, a=0.5, b=2.0) + error(scale=10.0)\n return clip(draft, min_value=0.)" + "objectID": "04_testing.html#integration-testing", + "href": "04_testing.html#integration-testing", + "title": "Testing, linting and formatting", + "section": "Integration testing", + "text": "Integration testing\ndef test_integration():\n wl = get_water_level(time=\"2019-01-01\", location=\"Aarhus\")\n max_wls = get_max_water_level(wl, freq=\"Y\")\n report = summary_report(max_wls)\n\n assert report.title == \"Summary report\"\n assert report.text == \"The maximum water level in 2021 was 3.0 m\"" }, { - "objectID": "03_types_protocols.html#inline-method", - "href": "03_types_protocols.html#inline-method", - "title": "Python types, interfaces and protocols", - "section": "Inline method", - "text": "Inline method\nOpposite of extract mehtod.\ndef predict(x): \n draft = linear_model(x, a=0.5, b=2.0) + error(scale=10.0)\n return min(0.0, x)" - }, + "objectID": "04_testing.html#testing-in-vs-code", + "href": "04_testing.html#testing-in-vs-code", + "title": "Testing, linting and formatting", + "section": "Testing in VS Code", + "text": "Testing in VS Code" + }, { - "objectID": "03_types_protocols.html#composed-method", - "href": "03_types_protocols.html#composed-method", - "title": "Python types, interfaces and protocols", - "section": "Composed method", - "text": "Composed method\nBreak up a long method into smaller methods." + "objectID": "04_testing.html#fixtures", + "href": "04_testing.html#fixtures", + "title": "Testing, linting and formatting", + "section": "Fixtures", + "text": "Fixtures\n\n\nA piece of code that is used by multiple tests\nProvide data or services to tests\nDefined with @pytest.fixture\nSet up test environment\nPass fixtures as test arguments" }, { - "objectID": "03_types_protocols.html#composed-method-1", - "href": "03_types_protocols.html#composed-method-1", - "title": "Python types, interfaces and protocols", - "section": "Composed method1", - "text": "Composed method1\n\nDivide your program into methods that perform one identifiable task\nKeep all of the operations in a method at the same level of abstraction.\nThis will naturally result in programs with many small methods, each a few lines long.\nWhen you use Extract method a bunch of times on a method the original method becomes a Composed method.\n\nhttps://wiki.c2.com/?ComposedMethod" + "objectID": "04_testing.html#fixture-example", + "href": "04_testing.html#fixture-example", + "title": "Testing, linting and formatting", + "section": "Fixture example", + "text": "Fixture example\n@pytest.fixture\ndef water_level():\n return TimeSeries([1.0, .., 3.0], start = \"2019-01-01\")\n\ndef test_get_max_water_level(water_level):\n max_wls = get_max_water_level(water_level, freq=\"Y\")\n \n assert len(max_wls) == 1\n assert max_wls[0] == 3.0" }, { - "objectID": "05_dependencies_ci.html#section", - "href": "05_dependencies_ci.html#section", - "title": "Dependencies and Continuous Integration", - "section": "", - "text": "Application\nA program that is run by a user\n\ncommand line tool\nscript\nweb application\n\nPin versions to ensure reproducibility, e.g. numpy==1.11.0\n\nLibrary\nA program that is used by another program\n\nPython package\nLow level library (C, Fortran, Rust, …)\n\nMake the requirements as loose as possible, e.g. numpy>=1.11.0\n\n\n\nMake the requirements loose, to avoid conflicts with other packages." + "objectID": "04_testing.html#test-coverage", + "href": "04_testing.html#test-coverage", + "title": "Testing, linting and formatting", + "section": "Test coverage", + "text": "Test coverage\n\n\nA measure of how much of your code is tested\nA good test suite should cover all the code\nInstall pytest-cov\nRun tests with coverage report\n\npytest --cov=myproj\n\nUse coverage report to identify untested code" }, { - "objectID": "05_dependencies_ci.html#dependency-management", - "href": "05_dependencies_ci.html#dependency-management", - "title": "Dependencies and Continuous Integration", - "section": "Dependency management", - "text": "Dependency management\nExample of pinning versions:\n\n\nrequirements.txt\n\nnumpy==1.11.0\nscipy==0.17.0\nmatplotlib==1.5.1\n\n\nOr using a range of versions:\n\n\nrequirements.txt\n\nnumpy>=1.11.0\nscipy>=0.17.0\nmatplotlib>=1.5.1,<=2.0.0\n\n\n\nInstall dependencies:\n$ pip install -r requirements.txt\n\nA common way to declare dependencies is to use a requirements.txt file." + "objectID": "04_testing.html#test-coverage-report", + "href": "04_testing.html#test-coverage-report", + "title": "Testing, linting and formatting", + "section": "Test coverage report", + "text": "Test coverage report\npytest --cov=myproj tests/\n-------------------- coverage: ... ---------------------\nName Stmts Miss Cover\n----------------------------------------\nmyproj/__init__ 2 0 100%\nmyproj/myproj 257 13 94%\nmyproj/feature4286 94 7 92%\n----------------------------------------\nTOTAL 353 20 94%" }, { - "objectID": "05_dependencies_ci.html#creating-an-installable-package", - "href": "05_dependencies_ci.html#creating-an-installable-package", - "title": "Dependencies and Continuous Integration", - "section": "Creating an installable package", - "text": "Creating an installable package" + "objectID": "04_testing.html#testing-advice", + "href": "04_testing.html#testing-advice", + "title": "Testing, linting and formatting", + "section": "Testing advice", + "text": "Testing advice\n\n\n\n\n\n\nTest edge cases\n\n\n\nempty lists\nlists with a single element\nempty strings\nempty dictionaries\nNone\nnp.nan" }, { - "objectID": "05_dependencies_ci.html#setup.py-vs-pyproject.toml", - "href": "05_dependencies_ci.html#setup.py-vs-pyproject.toml", - "title": "Dependencies and Continuous Integration", - "section": "setup.py vs pyproject.toml", - "text": "setup.py vs pyproject.toml\nsetup.py\n\nTraditional approach to defining package configuration and dependencies.\nDefines metadata, dependencies, and entry points in a Python script.\nUses setuptools to generate packages and install the package.\n\n\npyproject.toml\n\nModern approach to defining package configuration and dependencies.\nDefines metadata, dependencies, build tools, and packaging config in a TOML file.\nUses poetry or hatchling to generate packages and install the package." + "objectID": "04_testing.html#tests-act-as-specification", + "href": "04_testing.html#tests-act-as-specification", + "title": "Testing, linting and formatting", + "section": "Tests act as specification", + "text": "Tests act as specification\ndef test_operable_period_can_be_missing():\n\n assert is_operable(height=1.0, period=None)\n assert is_operable(height=1.0, period=np.nan)\n assert is_operable(height=1.0)\n assert not is_operable(height=11.0)\n\ndef test_height_can_not_be_missing():\n\n with pytest.raises(ValueError) as excinfo:\n is_operable(height=None)\n is_operable(height=np.nan)\n \n assert \"height\" in str(excinfo.value)" }, { - "objectID": "05_dependencies_ci.html#install-with-optional-dependencies", - "href": "05_dependencies_ci.html#install-with-optional-dependencies", - "title": "Dependencies and Continuous Integration", - "section": "Install with optional dependencies", - "text": "Install with optional dependencies\n\n\npyproject.toml\n\n[project.optional-dependencies]\ndev = [\"pytest\",\n \"black==22.3.0\",\n \"sphinx\",\n \"sphinx-rtd-theme\",\n \"myst-parser\",\n ]\n\ntest = [\"pytest\", \"pytest-cov\"]\n\n \n$ pip install mini[test]" + "objectID": "04_testing.html#test-driven-development", + "href": "04_testing.html#test-driven-development", + "title": "Testing, linting and formatting", + "section": "Test driven development", + "text": "Test driven development\n\n\nWrite a test that fails ❌\nWrite the code to make the test pass ✅\nRefactor the code ⚒️\n\n\n\nThe benefit of this approach is that you are forced to think about the expected behaviour of your code before you write it.\nIt is also too easy to write a test that passes without actually testing the code." }, { - "objectID": "05_dependencies_ci.html#creating-an-installable-package-1", - "href": "05_dependencies_ci.html#creating-an-installable-package-1", - "title": "Dependencies and Continuous Integration", - "section": "Creating an installable package", - "text": "Creating an installable package\nInstall package in editable mode:\n$ pip install -e .\n\nStart a Python session:\n>>> import mini\n>>> mini.foo()\n42\n\n\nRun tests:\n$ pytest\n...\n\ntests/test_foo.py . [100%]\n\n=============== 1 passed in 0.01s ===============" + "objectID": "04_testing.html#section", + "href": "04_testing.html#section", + "title": "Testing, linting and formatting", + "section": "", + "text": "and now for something completely different…" }, { - "objectID": "05_dependencies_ci.html#virtual-environments", - "href": "05_dependencies_ci.html#virtual-environments", - "title": "Dependencies and Continuous Integration", - "section": "Virtual environments", - "text": "Virtual environments\n\n\nCreates a clean environment for each project\nAllows different versions of a package to coexist on your machine\nCan be used to create a reproducible environment for a project\nTo achieve complete isolation, use Docker containers (not covered in this course)" + "objectID": "04_testing.html#the-zen-of-python", + "href": "04_testing.html#the-zen-of-python", + "title": "Testing, linting and formatting", + "section": "The Zen of Python", + "text": "The Zen of Python\nBeautiful is better than ugly.\nExplicit is better than implicit.\nSimple is better than complex.\nComplex is better than complicated.\nFlat is better than nested.\nSparse is better than dense.\nReadability counts.\n\n…\nErrors should never pass silently.\nUnless explicitly silenced.\n…" }, { - "objectID": "05_dependencies_ci.html#virtual-environments-1", - "href": "05_dependencies_ci.html#virtual-environments-1", - "title": "Dependencies and Continuous Integration", - "section": "Virtual environments", - "text": "Virtual environments\n$ which python\n/usr/bin/python\n$ python -m venv venv\n$ source venv/bin/activate # for 🐧 or venv\\Scripts\\activate.bat 🪟\n(venv)$ which python\n/home/user/src/myproj/venv/bin/python\n(venv)$ pip install -r requirements.txt\n\n\nBack in the days, when disk space was limited, it was a good idea to have a separate environment for each project.\nToday, disk space is cheap, and it is a good idea to have a separate environment for each project." + "objectID": "04_testing.html#exceptions", + "href": "04_testing.html#exceptions", + "title": "Testing, linting and formatting", + "section": "Exceptions", + "text": "Exceptions\n\n\nExceptions are a way to handle errors in your code.\nRaising an exception can prevent propagating bad values.\nExceptions are communication between the programmer and the user.\nThere are many built-in exceptions in Python\n\nIndexError\nKeyError\nValueError\nFileNotFoundError\n\nYou can also create your own custom exceptions, e.g. ModelInitialistionError, MissingLicenseError?" }, { - "objectID": "05_dependencies_ci.html#condamamba-environments", - "href": "05_dependencies_ci.html#condamamba-environments", - "title": "Dependencies and Continuous Integration", - "section": "Conda/mamba environments", - "text": "Conda/mamba environments\nConda/mamba is a package manager that can be used to create virtual environments.\n$ where python\nC:\\Users\\JAN\\AppData\\Local\\miniforge3\\python.exe\n$ conda create -n myproj -f requirements.txt\n$ conda activate myproj\n(myproj)$ where python\nC:\\Users\\JAN\\AppData\\Local\\miniforge3\\envs\\myproj\\python.exe" + "objectID": "04_testing.html#example-1", + "href": "04_testing.html#example-1", + "title": "Testing, linting and formatting", + "section": "Example", + "text": "Example\n\n\nsrc/ops.py\n\ndef is_operable(height:float, period:float) -> bool:\n if height < 0.0:\n raise ValueError(f\"Supplied value of {height=} is unphysical.\")\n\n>>> is_operable(height=-1.0, period=4.0)\n\nTraceback (most recent call last):\n ...\nValueError: Supplied value of height=-1.0 is unphysical.\n\n\nIt is better to raise an exception (that can terminate the program), than to propagate a bad value." }, { - "objectID": "05_dependencies_ci.html#branch-naming-convention", - "href": "05_dependencies_ci.html#branch-naming-convention", - "title": "Dependencies and Continuous Integration", - "section": "Branch naming convention", - "text": "Branch naming convention\nThere doesn’t exist a clear naming conventions.\n\n\nmain is the main branch🙄, this was previously named master.\nFeature branches uses lowercase separated with dashes, e.g. interpolation-options\nBranches related to fixing issues, start with the id of the issue, e.g. 42-fix-missing-value-handling\nFind a naming convention that works for you and your team." + "objectID": "04_testing.html#warnings", + "href": "04_testing.html#warnings", + "title": "Testing, linting and formatting", + "section": "Warnings", + "text": "Warnings\nWarnings are a way to alert users of your code to potential issues or usage errors without actually halting the program’s execution.\n\n\nsrc/ops.py\n\nimport warnings\nwarnings.warn(\"This is a warning\")" }, { - "objectID": "05_dependencies_ci.html#time-for-a-discussion", - "href": "05_dependencies_ci.html#time-for-a-discussion", - "title": "Dependencies and Continuous Integration", - "section": "Time for a discussion", - "text": "Time for a discussion\nDiscuss in learning teams (15 minutes):\nChapter 6: Separations of concerns in practice\nDiscuss what belongs in each layer of the application.\n\nPersistence layer\nBusiness logic layer\nPresentation layer\n\nShould your library have these three layers? If not, which layer is the focus of your library?" + "objectID": "04_testing.html#how-to-test-exceptions", + "href": "04_testing.html#how-to-test-exceptions", + "title": "Testing, linting and formatting", + "section": "How to test exceptions", + "text": "How to test exceptions\n\n\ntests/test_ops.py\n\nimport pytest\nfrom ops import is_operable\n\ndef test_negative_heights_are_not_valid():\n with pytest.raises(ValueError):\n is_operable(height=-1.0, period=4.0)\n\nThe same can be done with warnings." }, { - "objectID": "05_dependencies_ci.html#continuous-integration", - "href": "05_dependencies_ci.html#continuous-integration", - "title": "Dependencies and Continuous Integration", - "section": "Continuous Integration", - "text": "Continuous Integration\nRunning tests on every commit in a well defined environment ensures that the code is working as expected.\nIt solves the “it works on my machine” problem.\nExecuting code on a remote server is a good way to ensure that the code is working as expected.\nTwo main approaches are:\n\nGitHub Actions\nAzure Pipelines\n\n\nGitHub actions was forked from Azure Pipelines and runs on the same type of infrastructure, thus are very similar technologies." + "objectID": "04_testing.html#linting", + "href": "04_testing.html#linting", + "title": "Testing, linting and formatting", + "section": "Linting", + "text": "Linting\nA way to check your code for common errors and style issues.\nruff is a new tool for linting Python code.\n\nsyntax errors\nunused imports\nunused variables\nundefined names\ncode style (e.g. line length, indentation, whitespace, etc.)" }, { - "objectID": "05_dependencies_ci.html#github-actions", - "href": "05_dependencies_ci.html#github-actions", - "title": "Dependencies and Continuous Integration", - "section": "GitHub Actions", - "text": "GitHub Actions\n\n\n\nWorkflow are stored in the .github/workflows folder.\nWorkflow is described in a YAML file.\nYAML is whitespace sensitive (like Python).\nYAML can contain lists, dictionaries and strings, and can be nested.\n\n\n$ tree mikeio/.github/\nmikeio/.github/\n└── workflows\n ├── docs.yml\n ├── downstream_test.yml\n ├── full_test.yml\n ├── notebooks_test.yml\n ├── perf_test.yml\n ├── python-publish.yml\n └── quick_test.yml" + "objectID": "04_testing.html#linting-with-ruff", + "href": "04_testing.html#linting-with-ruff", + "title": "Testing, linting and formatting", + "section": "Linting with ruff", + "text": "Linting with ruff\n\n\nexamples/04_testing/process.py\n\nimport requests\nimport scipy\n\ndef preprocess(x, y, xout):\n\n x = x[~np.isnan(x)] \n method = \"cubic\"\n # interpolate missing values with cubic spline\n return scipy.interpolate.interp1d(x, y)(xout)\n\nRun ruff:\n$ ruff process.py\nprocess.py:1:8: F401 [*] `requests` imported but unused\nprocess.py:6:12: F821 Undefined name `np`\nprocess.py:7:5: F841 [*] Local variable `method` is assigned to but never used\nFound 3 errors.\n[*] 2 potentially fixable with the --fix option.\n\n\nLinting is a fast way to find common errors.\nUnused imports are confusing.\nUnused and undefined variables are usually a typo or a mistake. Fixing them can prevent bugs." }, { - "objectID": "05_dependencies_ci.html#benefits-of-ci", - "href": "05_dependencies_ci.html#benefits-of-ci", - "title": "Dependencies and Continuous Integration", - "section": "Benefits of CI", - "text": "Benefits of CI\n\n\nRun tests on every commit\nTest on different operating systems\nTest on different Python versions\nCreate API documentation (next week)\nPublish package to PyPI or similar package repository (two weeks from now)" + "objectID": "04_testing.html#formatting", + "href": "04_testing.html#formatting", + "title": "Testing, linting and formatting", + "section": "Formatting", + "text": "Formatting\n\n\nFormatting code for readability and maintainability is essential.\nblack is an opinionated automatic code formatter for Python.\nIt enforces its own rules for formatting, which are not configurable.\nHaving a unified style makes code changes easier to understand and collaborate on." }, { - "objectID": "05_dependencies_ci.html#triggers", - "href": "05_dependencies_ci.html#triggers", - "title": "Dependencies and Continuous Integration", - "section": "Triggers", - "text": "Triggers\n\npush and pull_request are the most common triggers\nschedule can be used to run the workflow on a schedule\nworkflow_dispatch can be used to trigger the workflow manually\n\non:\n push:\n branches: [ main ]\n pull_request:\n branches: [ main ]\n schedule:\n - cron: '0 0 * * 0'\n workflow_dispatch:" + "objectID": "04_testing.html#running-black", + "href": "04_testing.html#running-black", + "title": "Testing, linting and formatting", + "section": "Running Black", + "text": "Running Black\n$ black .\nreformatted data_utils.py\nreformatted dfsu/__init__.py\nreformatted dataarray.py\nreformatted dataset.py\nreformatted spatial/geometry.py\nreformatted pfs/pfssection.py\n\nAll done! ✨ 🍰 ✨\n6 files reformatted, 27 files left unchanged." }, { - "objectID": "05_dependencies_ci.html#jobs", - "href": "05_dependencies_ci.html#jobs", - "title": "Dependencies and Continuous Integration", - "section": "Jobs", - "text": "Jobs\n\nOperating system\nPython version\n…\n\n{.yaml\"} ... jobs: build: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, windows-latest] python-version: [3.8, 3.9, \"3.10\",\"3.11\"] ..." + "objectID": "04_testing.html#running-black-1", + "href": "04_testing.html#running-black-1", + "title": "Testing, linting and formatting", + "section": "Running Black", + "text": "Running Black\nVisual Studio Code can be configured to run black automatically when saving a file using the Black extension." }, { - "objectID": "05_dependencies_ci.html#github-releases", - "href": "05_dependencies_ci.html#github-releases", - "title": "Dependencies and Continuous Integration", - "section": "GitHub Releases", - "text": "GitHub Releases\n\n\n\nGitHub releases are a way to publish software releases.\nYou can upload files, write release notes and tag the release.\nAs a minimum, the release will contain the source code at the time of the release.\nCreating a release can trigger other workflows, e.g. publishing a package to PyPI.\n\n\n\n\n\nhttps://github.com/pydata/xarray/releases/tag/v2022.12.0\n\n\n\nPython package development" + "objectID": "04_testing.html#profiling", + "href": "04_testing.html#profiling", + "title": "Testing, linting and formatting", + "section": "Profiling", + "text": "Profiling\n\n\nProfiling is a way to measure the performance of your code.\nIt can help you identify bottlenecks in your code.\nYour intuition about what is slow is often wrong.\nThe line_profiler package reports the time spent on each line of code.\nIt can be run inside a notebook using the lprun magic command." + }, + { + "objectID": "04_testing.html#profiling---example-code", + "href": "04_testing.html#profiling---example-code", + "title": "Testing, linting and formatting", + "section": "Profiling - example code", + "text": "Profiling - example code\nimport numpy as np\n\ndef top_neighbors(points, radius=\"0.1\"):\n \"\"\"Don't use this function, it's only purpose is to be profiled.\"\"\"\n n = len(points)\n idx = np.array([int(x) for x in str.split(\"0 \"* n)])\n\n for i in range(n):\n for j in range(n):\n if i != j:\n d = np.sqrt(np.sum((points[i] - points[j])**2))\n if d < float(radius): \n idx[i] += 1\n for i in range(n):\n for j in range(n - i - 1):\n if idx[j] < idx[j + 1]:\n idx[j], idx[j + 1] = idx[j + 1], idx[j]\n points[j], points[j + 1] = points[j + 1], points[j]\n return points\n\ndef main():\n points = np.random.rand(1000, 2)\n top = top_neighbors(points)" + }, + { + "objectID": "04_testing.html#profiling---output", + "href": "04_testing.html#profiling---output", + "title": "Testing, linting and formatting", + "section": "Profiling - output", + "text": "Profiling - output\nInvoking the jupyter magic command lprun with:\n\nfunction to profile - top_neighbors\ncode to run - main()\n\n%lprun -f top_neighbors main()\n\n\nLine # Hits Time Per Hit % Time Line Contents\n==============================================================\n 3 def top_neighbors(points, radius=\"0.1\"):\n 4 \"\"\"Don't use this function, it's only purpose is to be profiled.\"\"\"\n 5 1 2800.0 2800.0 0.0 n = len(points)\n 6 1 353300.0 353300.0 0.0 idx = np.array([int(x) for x in str.split(\"0 \"* n)])\n 7 \n 8 1001 345100.0 344.8 0.0 for i in range(n):\n 9 1001000 378191701.0 377.8 2.2 for j in range(n):\n 10 1000000 328387205.0 328.4 1.9 if i != j:\n 11 999000 1e+10 14473.0 83.8 d = np.sqrt(np.sum((points[i] - points[j])**2))\n 12 999000 933778605.0 934.7 5.4 if d < float(radius): \n 13 28952 57010001.0 1969.1 0.3 idx[i] += 1\n 14 1001 367100.0 366.7 0.0 for i in range(n):\n 15 500500 144295203.0 288.3 0.8 for j in range(n - i - 1):\n 16 499500 302166901.0 604.9 1.8 if idx[j] < idx[j + 1]:\n 17 240227 212070500.0 882.8 1.2 idx[j], idx[j + 1] = idx[j + 1], idx[j]\n 18 240227 437538803.0 1821.4 2.5 points[j], points[j + 1] = points[j + 1], points[j]\n 19 1 500.0 500.0 0.0 return points\n\n\n\nPython package development" }, { "objectID": "06_documentation.html#why-document-your-code", @@ -700,577 +728,598 @@ "text": "“Private” website\n\nA GitHub repository can be made private\nThe website is still publicly available\nIn order to “hide” it from search engines, add a robots.txt file to the root of the website\nThis is not a secure way to hide a website, but it is a simple way to hide it from search engines.\n\n\n\nrobots.txt\n\nUser-agent: *\nDisallow: /\n\n\n\n\nPython package development" }, { - "objectID": "04_testing.html#testing", - "href": "04_testing.html#testing", - "title": "Testing, linting and formatting", - "section": "Testing", - "text": "Testing\nVerify code is working as expected.\nSimplest way to test is to run code and check output.\n\nAutomated testing checks output automatically.\nCode changes can break other parts of code.\nAutomatic testing verifies code is still working." - }, - { - "objectID": "04_testing.html#testing-workflow", - "href": "04_testing.html#testing-workflow", - "title": "Testing, linting and formatting", - "section": "Testing workflow", - "text": "Testing workflow\n\n\n\n\nflowchart TD\n A[Prepare inputs]\n B[Describe expected output]\n C[Obtain actual output]\n D[Compare actual and\\n expected output]\n\n A --> B --> C --> D" + "objectID": "course_structure.html", + "href": "course_structure.html", + "title": "Python package development", + "section": "", + "text": "graph TD\n\n M1(Git, Pull Requests, and code reviews)\n M2(Python functions, classes, and modules)\n M3(Object oriented design in Python)\n M4(Testing and auto-formatting)\n M5(Dependencies and GitHub actions)\n M6(Documentation)\n M7(Distributing your package)\n\n B1(The bigger picture)\n B2(Separations of concern)\n B3(Abstraction and encapsulation)\n B4(Designing for high performance)\n B5(Testing your software)\n B6(Separations of concerns in practice)\n B7(Extensibility and flexibility)\n B8(The rules and exceptions of inheritance)\n\n M1 --> M2 --> M3 --> M4 --> M5 --> M6 --> M7\n\n B1 --> M2\n B2 --> M2\n B4 --> M4" }, { - "objectID": "04_testing.html#unit-testing", - "href": "04_testing.html#unit-testing", - "title": "Testing, linting and formatting", - "section": "Unit testing", - "text": "Unit testing\n\n\n\n\n\n\nDefinition “Unit”\n\n\n\nA small, fundamental piece of code.\nExecuted in isolation with appropriate inputs.\n\n\n\n\n\n\nA function is typically considered a “unit”\nLines of code within functions are smaller (can’t be isolated)\nClasses are considered bigger (but can be treated as units)" + "objectID": "05_dependencies_ci.html#section", + "href": "05_dependencies_ci.html#section", + "title": "Dependencies and Continuous Integration", + "section": "", + "text": "Application\nA program that is run by a user\n\ncommand line tool\nscript\nweb application\n\nPin versions to ensure reproducibility, e.g. numpy==1.11.0\n\nLibrary\nA program that is used by another program\n\nPython package\nLow level library (C, Fortran, Rust, …)\n\nMake the requirements as loose as possible, e.g. numpy>=1.11.0\n\n\n\nMake the requirements loose, to avoid conflicts with other packages." }, { - "objectID": "04_testing.html#a-good-unit-test", - "href": "04_testing.html#a-good-unit-test", - "title": "Testing, linting and formatting", - "section": "A good unit test", - "text": "A good unit test\n\n\n\n\nFully automated (next week)\nHas full control over all the pieces running (“fake” external dependencies)\nCan be run in any order\nRuns in memory (no DB or file access, for example)\nConsistently returns the same result (no random numbers)\nRuns fast\nTests a single logical concept in the system\nReadable\nMaintainable\nTrustworthy" + "objectID": "05_dependencies_ci.html#dependency-management", + "href": "05_dependencies_ci.html#dependency-management", + "title": "Dependencies and Continuous Integration", + "section": "Dependency management", + "text": "Dependency management\nExample of pinning versions:\n\n\nrequirements.txt\n\nnumpy==1.11.0\nscipy==0.17.0\nmatplotlib==1.5.1\n\n\nOr using a range of versions:\n\n\nrequirements.txt\n\nnumpy>=1.11.0\nscipy>=0.17.0\nmatplotlib>=1.5.1,<=2.0.0\n\n\n\nInstall dependencies:\n$ pip install -r requirements.txt\n\nA common way to declare dependencies is to use a requirements.txt file." }, { - "objectID": "04_testing.html#example", - "href": "04_testing.html#example", - "title": "Testing, linting and formatting", - "section": "Example", - "text": "Example\n\nget a timeseries of water levels\nfind the maxiumum water level each year\ncreate a summary report for the subset of data" + "objectID": "05_dependencies_ci.html#creating-an-installable-package", + "href": "05_dependencies_ci.html#creating-an-installable-package", + "title": "Dependencies and Continuous Integration", + "section": "Creating an installable package", + "text": "Creating an installable package" }, { - "objectID": "04_testing.html#integration-testing", - "href": "04_testing.html#integration-testing", - "title": "Testing, linting and formatting", - "section": "Integration testing", - "text": "Integration testing\ndef test_integration():\n wl = get_water_level(time=\"2019-01-01\", location=\"Aarhus\")\n max_wls = get_max_water_level(wl, freq=\"Y\")\n report = summary_report(max_wls)\n\n assert report.title == \"Summary report\"\n assert report.text == \"The maximum water level in 2021 was 3.0 m\"" + "objectID": "05_dependencies_ci.html#setup.py-vs-pyproject.toml", + "href": "05_dependencies_ci.html#setup.py-vs-pyproject.toml", + "title": "Dependencies and Continuous Integration", + "section": "setup.py vs pyproject.toml", + "text": "setup.py vs pyproject.toml\nsetup.py\n\nTraditional approach to defining package configuration and dependencies.\nDefines metadata, dependencies, and entry points in a Python script.\nUses setuptools to generate packages and install the package.\n\n\npyproject.toml\n\nModern approach to defining package configuration and dependencies.\nDefines metadata, dependencies, build tools, and packaging config in a TOML file.\nUses poetry or hatchling to generate packages and install the package." }, { - "objectID": "04_testing.html#testing-in-vs-code", - "href": "04_testing.html#testing-in-vs-code", - "title": "Testing, linting and formatting", - "section": "Testing in VS Code", - "text": "Testing in VS Code" + "objectID": "05_dependencies_ci.html#install-with-optional-dependencies", + "href": "05_dependencies_ci.html#install-with-optional-dependencies", + "title": "Dependencies and Continuous Integration", + "section": "Install with optional dependencies", + "text": "Install with optional dependencies\n\n\npyproject.toml\n\n[project.optional-dependencies]\ndev = [\"pytest\",\n \"black==22.3.0\",\n \"sphinx\",\n \"sphinx-rtd-theme\",\n \"myst-parser\",\n ]\n\ntest = [\"pytest\", \"pytest-cov\"]\n\n \n$ pip install mini[test]" }, { - "objectID": "04_testing.html#fixtures", - "href": "04_testing.html#fixtures", - "title": "Testing, linting and formatting", - "section": "Fixtures", - "text": "Fixtures\n\n\nA piece of code that is used by multiple tests\nProvide data or services to tests\nDefined with @pytest.fixture\nSet up test environment\nPass fixtures as test arguments" + "objectID": "05_dependencies_ci.html#creating-an-installable-package-1", + "href": "05_dependencies_ci.html#creating-an-installable-package-1", + "title": "Dependencies and Continuous Integration", + "section": "Creating an installable package", + "text": "Creating an installable package\nInstall package in editable mode:\n$ pip install -e .\n\nStart a Python session:\n>>> import mini\n>>> mini.foo()\n42\n\n\nRun tests:\n$ pytest\n...\n\ntests/test_foo.py . [100%]\n\n=============== 1 passed in 0.01s ===============" }, { - "objectID": "04_testing.html#fixture-example", - "href": "04_testing.html#fixture-example", - "title": "Testing, linting and formatting", - "section": "Fixture example", - "text": "Fixture example\n@pytest.fixture\ndef water_level():\n return TimeSeries([1.0, .., 3.0], start = \"2019-01-01\")\n\ndef test_get_max_water_level(water_level):\n max_wls = get_max_water_level(water_level, freq=\"Y\")\n \n assert len(max_wls) == 1\n assert max_wls[0] == 3.0" + "objectID": "05_dependencies_ci.html#virtual-environments", + "href": "05_dependencies_ci.html#virtual-environments", + "title": "Dependencies and Continuous Integration", + "section": "Virtual environments", + "text": "Virtual environments\n\n\nCreates a clean environment for each project\nAllows different versions of a package to coexist on your machine\nCan be used to create a reproducible environment for a project\nTo achieve complete isolation, use Docker containers (not covered in this course)" }, { - "objectID": "04_testing.html#test-coverage", - "href": "04_testing.html#test-coverage", - "title": "Testing, linting and formatting", - "section": "Test coverage", - "text": "Test coverage\n\n\nA measure of how much of your code is tested\nA good test suite should cover all the code\nInstall pytest-cov\nRun tests with coverage report\n\npytest --cov=myproj\n\nUse coverage report to identify untested code" + "objectID": "05_dependencies_ci.html#virtual-environments-1", + "href": "05_dependencies_ci.html#virtual-environments-1", + "title": "Dependencies and Continuous Integration", + "section": "Virtual environments", + "text": "Virtual environments\n$ which python\n/usr/bin/python\n$ python -m venv venv\n$ source venv/bin/activate # for 🐧 or venv\\Scripts\\activate.bat 🪟\n(venv)$ which python\n/home/user/src/myproj/venv/bin/python\n(venv)$ pip install -r requirements.txt\n\n\nBack in the days, when disk space was limited, it was a good idea to have a separate environment for each project.\nToday, disk space is cheap, and it is a good idea to have a separate environment for each project." }, { - "objectID": "04_testing.html#test-coverage-report", - "href": "04_testing.html#test-coverage-report", - "title": "Testing, linting and formatting", - "section": "Test coverage report", - "text": "Test coverage report\npytest --cov=myproj tests/\n-------------------- coverage: ... ---------------------\nName Stmts Miss Cover\n----------------------------------------\nmyproj/__init__ 2 0 100%\nmyproj/myproj 257 13 94%\nmyproj/feature4286 94 7 92%\n----------------------------------------\nTOTAL 353 20 94%" + "objectID": "05_dependencies_ci.html#condamamba-environments", + "href": "05_dependencies_ci.html#condamamba-environments", + "title": "Dependencies and Continuous Integration", + "section": "Conda/mamba environments", + "text": "Conda/mamba environments\nConda/mamba is a package manager that can be used to create virtual environments.\n$ where python\nC:\\Users\\JAN\\AppData\\Local\\miniforge3\\python.exe\n$ conda create -n myproj -f requirements.txt\n$ conda activate myproj\n(myproj)$ where python\nC:\\Users\\JAN\\AppData\\Local\\miniforge3\\envs\\myproj\\python.exe" }, { - "objectID": "04_testing.html#testing-advice", - "href": "04_testing.html#testing-advice", - "title": "Testing, linting and formatting", - "section": "Testing advice", - "text": "Testing advice\n\n\n\n\n\n\nTest edge cases\n\n\n\nempty lists\nlists with a single element\nempty strings\nempty dictionaries\nNone\nnp.nan" + "objectID": "05_dependencies_ci.html#branch-naming-convention", + "href": "05_dependencies_ci.html#branch-naming-convention", + "title": "Dependencies and Continuous Integration", + "section": "Branch naming convention", + "text": "Branch naming convention\nThere doesn’t exist a clear naming conventions.\n\n\nmain is the main branch🙄, this was previously named master.\nFeature branches uses lowercase separated with dashes, e.g. interpolation-options\nBranches related to fixing issues, start with the id of the issue, e.g. 42-fix-missing-value-handling\nFind a naming convention that works for you and your team." }, { - "objectID": "04_testing.html#tests-act-as-specification", - "href": "04_testing.html#tests-act-as-specification", - "title": "Testing, linting and formatting", - "section": "Tests act as specification", - "text": "Tests act as specification\ndef test_operable_period_can_be_missing():\n\n assert is_operable(height=1.0, period=None)\n assert is_operable(height=1.0, period=np.nan)\n assert is_operable(height=1.0)\n assert not is_operable(height=11.0)\n\ndef test_height_can_not_be_missing():\n\n with pytest.raises(ValueError) as excinfo:\n is_operable(height=None)\n is_operable(height=np.nan)\n \n assert \"height\" in str(excinfo.value)" + "objectID": "05_dependencies_ci.html#time-for-a-discussion", + "href": "05_dependencies_ci.html#time-for-a-discussion", + "title": "Dependencies and Continuous Integration", + "section": "Time for a discussion", + "text": "Time for a discussion\nDiscuss in learning teams (15 minutes):\nChapter 6: Separations of concerns in practice\nDiscuss what belongs in each layer of the application.\n\nPersistence layer\nBusiness logic layer\nPresentation layer\n\nShould your library have these three layers? If not, which layer is the focus of your library?" }, { - "objectID": "04_testing.html#test-driven-development", - "href": "04_testing.html#test-driven-development", - "title": "Testing, linting and formatting", - "section": "Test driven development", - "text": "Test driven development\n\n\nWrite a test that fails ❌\nWrite the code to make the test pass ✅\nRefactor the code ⚒️\n\n\n\nThe benefit of this approach is that you are forced to think about the expected behaviour of your code before you write it.\nIt is also too easy to write a test that passes without actually testing the code." + "objectID": "05_dependencies_ci.html#continuous-integration", + "href": "05_dependencies_ci.html#continuous-integration", + "title": "Dependencies and Continuous Integration", + "section": "Continuous Integration", + "text": "Continuous Integration\nRunning tests on every commit in a well defined environment ensures that the code is working as expected.\nIt solves the “it works on my machine” problem.\nExecuting code on a remote server is a good way to ensure that the code is working as expected.\nTwo main approaches are:\n\nGitHub Actions\nAzure Pipelines\n\n\nGitHub actions was forked from Azure Pipelines and runs on the same type of infrastructure, thus are very similar technologies." }, { - "objectID": "04_testing.html#section", - "href": "04_testing.html#section", - "title": "Testing, linting and formatting", - "section": "", - "text": "and now for something completely different…" + "objectID": "05_dependencies_ci.html#github-actions", + "href": "05_dependencies_ci.html#github-actions", + "title": "Dependencies and Continuous Integration", + "section": "GitHub Actions", + "text": "GitHub Actions\n\n\n\nWorkflow are stored in the .github/workflows folder.\nWorkflow is described in a YAML file.\nYAML is whitespace sensitive (like Python).\nYAML can contain lists, dictionaries and strings, and can be nested.\n\n\n$ tree mikeio/.github/\nmikeio/.github/\n└── workflows\n ├── docs.yml\n ├── downstream_test.yml\n ├── full_test.yml\n ├── notebooks_test.yml\n ├── perf_test.yml\n ├── python-publish.yml\n └── quick_test.yml" }, { - "objectID": "04_testing.html#the-zen-of-python", - "href": "04_testing.html#the-zen-of-python", - "title": "Testing, linting and formatting", - "section": "The Zen of Python", - "text": "The Zen of Python\nBeautiful is better than ugly.\nExplicit is better than implicit.\nSimple is better than complex.\nComplex is better than complicated.\nFlat is better than nested.\nSparse is better than dense.\nReadability counts.\n\n…\nErrors should never pass silently.\nUnless explicitly silenced.\n…" + "objectID": "05_dependencies_ci.html#benefits-of-ci", + "href": "05_dependencies_ci.html#benefits-of-ci", + "title": "Dependencies and Continuous Integration", + "section": "Benefits of CI", + "text": "Benefits of CI\n\n\nRun tests on every commit\nTest on different operating systems\nTest on different Python versions\nCreate API documentation (next week)\nPublish package to PyPI or similar package repository (two weeks from now)" }, { - "objectID": "04_testing.html#exceptions", - "href": "04_testing.html#exceptions", - "title": "Testing, linting and formatting", - "section": "Exceptions", - "text": "Exceptions\n\n\nExceptions are a way to handle errors in your code.\nRaising an exception can prevent propagating bad values.\nExceptions are communication between the programmer and the user.\nThere are many built-in exceptions in Python\n\nIndexError\nKeyError\nValueError\nFileNotFoundError\n\nYou can also create your own custom exceptions, e.g. ModelInitialistionError, MissingLicenseError?" + "objectID": "05_dependencies_ci.html#triggers", + "href": "05_dependencies_ci.html#triggers", + "title": "Dependencies and Continuous Integration", + "section": "Triggers", + "text": "Triggers\n\npush and pull_request are the most common triggers\nschedule can be used to run the workflow on a schedule\nworkflow_dispatch can be used to trigger the workflow manually\n\non:\n push:\n branches: [ main ]\n pull_request:\n branches: [ main ]\n schedule:\n - cron: '0 0 * * 0'\n workflow_dispatch:" }, { - "objectID": "04_testing.html#example-1", - "href": "04_testing.html#example-1", - "title": "Testing, linting and formatting", - "section": "Example", - "text": "Example\n\n\nsrc/ops.py\n\ndef is_operable(height:float, period:float) -> bool:\n if height < 0.0:\n raise ValueError(f\"Supplied value of {height=} is unphysical.\")\n\n>>> is_operable(height=-1.0, period=4.0)\n\nTraceback (most recent call last):\n ...\nValueError: Supplied value of height=-1.0 is unphysical.\n\n\nIt is better to raise an exception (that can terminate the program), than to propagate a bad value." + "objectID": "05_dependencies_ci.html#jobs", + "href": "05_dependencies_ci.html#jobs", + "title": "Dependencies and Continuous Integration", + "section": "Jobs", + "text": "Jobs\n\nOperating system\nPython version\n…\n\n{.yaml\"} ... jobs: build: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, windows-latest] python-version: [3.8, 3.9, \"3.10\",\"3.11\"] ..." }, { - "objectID": "04_testing.html#warnings", - "href": "04_testing.html#warnings", - "title": "Testing, linting and formatting", - "section": "Warnings", - "text": "Warnings\nWarnings are a way to alert users of your code to potential issues or usage errors without actually halting the program’s execution.\n\n\nsrc/ops.py\n\nimport warnings\nwarnings.warn(\"This is a warning\")" + "objectID": "05_dependencies_ci.html#github-releases", + "href": "05_dependencies_ci.html#github-releases", + "title": "Dependencies and Continuous Integration", + "section": "GitHub Releases", + "text": "GitHub Releases\n\n\n\nGitHub releases are a way to publish software releases.\nYou can upload files, write release notes and tag the release.\nAs a minimum, the release will contain the source code at the time of the release.\nCreating a release can trigger other workflows, e.g. publishing a package to PyPI.\n\n\n\n\n\nhttps://github.com/pydata/xarray/releases/tag/v2022.12.0\n\n\n\nPython package development" }, { - "objectID": "04_testing.html#how-to-test-exceptions", - "href": "04_testing.html#how-to-test-exceptions", - "title": "Testing, linting and formatting", - "section": "How to test exceptions", - "text": "How to test exceptions\n\n\ntests/test_ops.py\n\nimport pytest\nfrom ops import is_operable\n\ndef test_negative_heights_are_not_valid():\n with pytest.raises(ValueError):\n is_operable(height=-1.0, period=4.0)\n\nThe same can be done with warnings." + "objectID": "03_oop.html#object-oriented-design", + "href": "03_oop.html#object-oriented-design", + "title": "Object oriented design in Python", + "section": "Object oriented design", + "text": "Object oriented design\nBenefits of object oriented design:\n\nEncapsulation\nCode reuse (composition, inheritance)\nAbstraction" }, { - "objectID": "04_testing.html#linting", - "href": "04_testing.html#linting", - "title": "Testing, linting and formatting", - "section": "Linting", - "text": "Linting\nA way to check your code for common errors and style issues.\nruff is a new tool for linting Python code.\n\nsyntax errors\nunused imports\nunused variables\nundefined names\ncode style (e.g. line length, indentation, whitespace, etc.)" + "objectID": "03_oop.html#encapsulation", + "href": "03_oop.html#encapsulation", + "title": "Object oriented design in Python", + "section": "Encapsulation", + "text": "Encapsulation\nclass Location:\n def __init__(self, name, longitude, latitude):\n self.name = name.upper() # Names are always uppercase\n self.longitude = longitude\n self.latitude = latitude\n\n>>> loc = Location(\"Antwerp\", 4.42, 51.22)\n>>> loc.name\n'ANTWERP'\n>>> loc.name = \"Antwerpen\"\n>>> loc.name\n\"Antwerpen\" 😟" }, { - "objectID": "04_testing.html#linting-with-ruff", - "href": "04_testing.html#linting-with-ruff", - "title": "Testing, linting and formatting", - "section": "Linting with ruff", - "text": "Linting with ruff\n\n\nexamples/04_testing/process.py\n\nimport requests\nimport scipy\n\ndef preprocess(x, y, xout):\n\n x = x[~np.isnan(x)] \n method = \"cubic\"\n # interpolate missing values with cubic spline\n return scipy.interpolate.interp1d(x, y)(xout)\n\nRun ruff:\n$ ruff process.py\nprocess.py:1:8: F401 [*] `requests` imported but unused\nprocess.py:6:12: F821 Undefined name `np`\nprocess.py:7:5: F841 [*] Local variable `method` is assigned to but never used\nFound 3 errors.\n[*] 2 potentially fixable with the --fix option.\n\n\nLinting is a fast way to find common errors.\nUnused imports are confusing.\nUnused and undefined variables are usually a typo or a mistake. Fixing them can prevent bugs." + "objectID": "03_oop.html#encapsulation---attributes", + "href": "03_oop.html#encapsulation---attributes", + "title": "Object oriented design in Python", + "section": "Encapsulation - Attributes", + "text": "Encapsulation - Attributes\nVariables prefixed with an underscore (self._name) is a convention to indicate that the instance variable is private.\nclass Location:\n def __init__(self, name, longitude, latitude):\n self._name = name.upper() # Names are always uppercase\n ...\n\n @property\n def name(self):\n return self._name\n\n @name.setter\n def name(self, value):\n self._name = value.upper()\n\n>>> loc = Location(\"Antwerp\", 4.42, 51.22)\n>>> loc.name = \"Antwerpen\"\n>>> loc.name\n\"ANTWERPEN\" 😊" }, { - "objectID": "04_testing.html#formatting", - "href": "04_testing.html#formatting", - "title": "Testing, linting and formatting", - "section": "Formatting", - "text": "Formatting\n\n\nFormatting code for readability and maintainability is essential.\nblack is an opinionated automatic code formatter for Python.\nIt enforces its own rules for formatting, which are not configurable.\nHaving a unified style makes code changes easier to understand and collaborate on." + "objectID": "03_oop.html#composition", + "href": "03_oop.html#composition", + "title": "Object oriented design in Python", + "section": "Composition", + "text": "Composition\n\n\nComposition in object oriented design is a way to combine objects or data types into more complex objects.\n\n\n\n\n\nclassDiagram\n\n class Grid{\n + nx\n + dx\n + ny\n + dy\n + find_index()\n }\n\n class ItemInfo{\n + name\n + type\n + unit\n }\n\n class DataArray{\n + data\n + time\n + item\n + geometry\n + plot()\n }\n\n DataArray --* Grid\n DataArray --* ItemInfo" }, { - "objectID": "04_testing.html#running-black", - "href": "04_testing.html#running-black", - "title": "Testing, linting and formatting", - "section": "Running Black", - "text": "Running Black\n$ black .\nreformatted data_utils.py\nreformatted dfsu/__init__.py\nreformatted dataarray.py\nreformatted dataset.py\nreformatted spatial/geometry.py\nreformatted pfs/pfssection.py\n\nAll done! ✨ 🍰 ✨\n6 files reformatted, 27 files left unchanged." + "objectID": "03_oop.html#composition---example", + "href": "03_oop.html#composition---example", + "title": "Object oriented design in Python", + "section": "Composition - Example", + "text": "Composition - Example\nclass Grid:\n def __init__(self, nx, dx, ny, dy):\n self.nx = nx\n self.dx = dx\n self.ny = ny\n self.dy = dy\n \n def find_index(self, x,y):\n ...\n\nclass DataArray:\n def __init__(self, data, time, item, geometry):\n self.data = data\n self.time = time\n self.item = item\n self.geometry = geometry\n\n def plot(self):\n ..." }, { - "objectID": "04_testing.html#running-black-1", - "href": "04_testing.html#running-black-1", - "title": "Testing, linting and formatting", - "section": "Running Black", - "text": "Running Black\nVisual Studio Code can be configured to run black automatically when saving a file using the Black extension." + "objectID": "03_oop.html#inheritance", + "href": "03_oop.html#inheritance", + "title": "Object oriented design in Python", + "section": "Inheritance", + "text": "Inheritance" }, { - "objectID": "04_testing.html#profiling", - "href": "04_testing.html#profiling", - "title": "Testing, linting and formatting", - "section": "Profiling", - "text": "Profiling\n\n\nProfiling is a way to measure the performance of your code.\nIt can help you identify bottlenecks in your code.\nYour intuition about what is slow is often wrong.\nThe line_profiler package reports the time spent on each line of code.\nIt can be run inside a notebook using the lprun magic command." + "objectID": "03_oop.html#inheritance---example", + "href": "03_oop.html#inheritance---example", + "title": "Object oriented design in Python", + "section": "Inheritance - Example", + "text": "Inheritance - Example\n\n\n\n\n\nclassDiagram\n\nclass _GeometryFM{\n+ node_coordinates\n+ element_table\n}\n\nclass GeometryFM2D{\n+ interp2d()\n+ get_element_area()\n+ plot()\n}\n\nclass _GeometryFMLayered{\n- _n_layers\n- _n_sigma\n+ to_2d_geometry()\n}\n\nclass GeometryFM3D{\n+ plot()\n}\n\nclass GeometryFMVerticalProfile{\n+ plot()\n}\n _GeometryFM <|-- GeometryFM2D\n _GeometryFM <|-- _GeometryFMLayered\n _GeometryFMLayered <|-- GeometryFM3D\n _GeometryFMLayered <|-- GeometryFMVerticalProfile" }, { - "objectID": "04_testing.html#profiling---example-code", - "href": "04_testing.html#profiling---example-code", - "title": "Testing, linting and formatting", - "section": "Profiling - example code", - "text": "Profiling - example code\nimport numpy as np\n\ndef top_neighbors(points, radius=\"0.1\"):\n \"\"\"Don't use this function, it's only purpose is to be profiled.\"\"\"\n n = len(points)\n idx = np.array([int(x) for x in str.split(\"0 \"* n)])\n\n for i in range(n):\n for j in range(n):\n if i != j:\n d = np.sqrt(np.sum((points[i] - points[j])**2))\n if d < float(radius): \n idx[i] += 1\n for i in range(n):\n for j in range(n - i - 1):\n if idx[j] < idx[j + 1]:\n idx[j], idx[j + 1] = idx[j + 1], idx[j]\n points[j], points[j + 1] = points[j + 1], points[j]\n return points\n\ndef main():\n points = np.random.rand(1000, 2)\n top = top_neighbors(points)" + "objectID": "03_oop.html#inheritance---example-2", + "href": "03_oop.html#inheritance---example-2", + "title": "Object oriented design in Python", + "section": "Inheritance - Example (2)", + "text": "Inheritance - Example (2)\nclass _GeometryFMLayered(_GeometryFM):\n def __init__(self, nodes, elements, n_layers, n_sigma):\n # call the parent class init method\n super().__init__(\n nodes=nodes,\n elements=elements,\n )\n self._n_layers = n_layers\n self._n_sigma = n_sigma" }, { - "objectID": "04_testing.html#profiling---output", - "href": "04_testing.html#profiling---output", - "title": "Testing, linting and formatting", - "section": "Profiling - output", - "text": "Profiling - output\nInvoking the jupyter magic command lprun with:\n\nfunction to profile - top_neighbors\ncode to run - main()\n\n%lprun -f top_neighbors main()\n\n\nLine # Hits Time Per Hit % Time Line Contents\n==============================================================\n 3 def top_neighbors(points, radius=\"0.1\"):\n 4 \"\"\"Don't use this function, it's only purpose is to be profiled.\"\"\"\n 5 1 2800.0 2800.0 0.0 n = len(points)\n 6 1 353300.0 353300.0 0.0 idx = np.array([int(x) for x in str.split(\"0 \"* n)])\n 7 \n 8 1001 345100.0 344.8 0.0 for i in range(n):\n 9 1001000 378191701.0 377.8 2.2 for j in range(n):\n 10 1000000 328387205.0 328.4 1.9 if i != j:\n 11 999000 1e+10 14473.0 83.8 d = np.sqrt(np.sum((points[i] - points[j])**2))\n 12 999000 933778605.0 934.7 5.4 if d < float(radius): \n 13 28952 57010001.0 1969.1 0.3 idx[i] += 1\n 14 1001 367100.0 366.7 0.0 for i in range(n):\n 15 500500 144295203.0 288.3 0.8 for j in range(n - i - 1):\n 16 499500 302166901.0 604.9 1.8 if idx[j] < idx[j + 1]:\n 17 240227 212070500.0 882.8 1.2 idx[j], idx[j + 1] = idx[j + 1], idx[j]\n 18 240227 437538803.0 1821.4 2.5 points[j], points[j + 1] = points[j + 1], points[j]\n 19 1 500.0 500.0 0.0 return points\n\n\n\nPython package development" + "objectID": "03_oop.html#composition-vs-inheritance", + "href": "03_oop.html#composition-vs-inheritance", + "title": "Object oriented design in Python", + "section": "Composition vs inheritance", + "text": "Composition vs inheritance\n\n\nInheritance is often used to reuse code, but this is not the main purpose of inheritance.\nInheritance is used to specialize behavior.\nIn most cases, composition is a better choice than inheritance.\nSome recent programming languages (e.g. Go & Rust) do not support this style of inheritance.\nUse inheritance only when it makes sense.\n\n\n\n\nHillard, 2020, Ch. 8 “The rules (and exceptions) of inheritance”" }, { - "objectID": "01_version_control.html#why-use-version-control", - "href": "01_version_control.html#why-use-version-control", - "title": "Git, GitHub, Pull Requests, and code reviews", - "section": "Why use version control?", - "text": "Why use version control?\n\n\n\n\n\nManage changes to code over time\nKeep track of changes and revert to previous versions if needed.\nCollaborate and merge changes from different people\nEnsure code stability\nBest practice for software development" + "objectID": "03_oop.html#types", + "href": "03_oop.html#types", + "title": "Object oriented design in Python", + "section": "Types", + "text": "Types\nC#\nint n = 2;\nString s = \"Hello\";\n\npublic String RepeatedString(String s, int n) {\n return Enumerable.Repeat(s, n).Aggregate((a, b) => a + b);\n}\n\nPython\nn = 2\ns = \"Hello\"\n\ndef repeated_string(s, n):\n return s * n" }, { - "objectID": "01_version_control.html#centralized-version-control", - "href": "01_version_control.html#centralized-version-control", - "title": "Git, GitHub, Pull Requests, and code reviews", - "section": "Centralized version control", - "text": "Centralized version control\n\nSingle source with the entire history\nLocal copy with latest version . . .\nExamples: SVN, Surround" + "objectID": "03_oop.html#types-1", + "href": "03_oop.html#types-1", + "title": "Object oriented design in Python", + "section": "Types", + "text": "Types\n\n\nPython is a dynamically typed language\nTypes are not checked at compile time\nTypes are checked at runtime\n\n\n\nPython with type hints\nn: int = 2\ns: str = \"Hello\"\n\ndef repeated_string(s:str, n:int) -> str:\n return s * n" }, { - "objectID": "01_version_control.html#distributed-version-control", - "href": "01_version_control.html#distributed-version-control", - "title": "Git, GitHub, Pull Requests, and code reviews", - "section": "Distributed version control", - "text": "Distributed version control\n\nLocal copy has the entire history\nCommit changes to code offline\nAuthorative source (origin) . . .\nExamples: Git, Mercurial" + "objectID": "03_oop.html#abstraction", + "href": "03_oop.html#abstraction", + "title": "Object oriented design in Python", + "section": "Abstraction", + "text": "Abstraction\n\n\nVersion A\ntotal = 0.0\nfor x in values:\n total = total +x\n\nVersion B\ntotal = sum(values)\n\n\n\n\n\nUsing functions, e.g. sum() allows us to operate on a higher level of abstraction.\nToo little abstraction will force you to write many lines of boiler-plate code\nToo much abstraction limits the flexibility\n✨Find the right level of abstraction!✨\n\n\n\n\nWhich version is easiest to understand?\nWhich version is easiest to change?" }, { - "objectID": "01_version_control.html#git", - "href": "01_version_control.html#git", - "title": "Git, GitHub, Pull Requests, and code reviews", - "section": "Git", - "text": "Git\nGit is a powerful tool for managing code changes and collaborating with others on a project.\n\nYou can use Git from the command line, or with a graphical user interface (GUI).\n\n\n> git add foo.py\n\n\n> git commit -m \"Nailed it\"\n\n\n> git push" + "objectID": "03_oop.html#collections-abstract-base-classes", + "href": "03_oop.html#collections-abstract-base-classes", + "title": "Object oriented design in Python", + "section": "Collections Abstract Base Classes", + "text": "Collections Abstract Base Classes\n\n\n\n\nclassDiagram\n Container <|-- Collection\n Sized <|-- Collection\n Iterable <|-- Collection\n \n class Container{\n __contains__(self, x)\n }\n\n class Sized{\n __len__(self)\n }\n\n class Iterable{\n __iter__(self)\n }\n\n\n\n\n\n\n\n\nIf a class implements __len__ it is a Sized object.\nIf a class implements __contains__ it is a Container object.\nIf a class implements __iter__ it is a Iterable object." }, { - "objectID": "01_version_control.html#basic-git-commands", - "href": "01_version_control.html#basic-git-commands", - "title": "Git, GitHub, Pull Requests, and code reviews", - "section": "Basic Git commands", - "text": "Basic Git commands\n\n\ngit add: adds a file to the staging area\ngit commit: creates a new commit with the changes in the staging area\ngit status: shows the current status of your repository\ngit log: shows the commit history of your repository\ngit stash: temporarily save changes that are not ready to be committed" + "objectID": "03_oop.html#collections-abstract-base-classes-1", + "href": "03_oop.html#collections-abstract-base-classes-1", + "title": "Object oriented design in Python", + "section": "Collections Abstract Base Classes", + "text": "Collections Abstract Base Classes\n>>> a = [1, 2, 3]\n>>> 1 in a\nTrue\n>>> a.__contains__(1)\nTrue\n>>> len(a)\n3\n>>> a.__len__()\n3\n>>> for x in a:\n... v.append(x)\n>>> it = a.__iter__()\n>>> next(it)\n1\n>>> next(it)\n2\n>>> next(it)\n3\n>>> next(it)\nTraceback (most recent call last):\n File \"<stdin>\", line 1, in <module>\nStopIteration" }, { - "objectID": "01_version_control.html#working-with-remote-repositories", - "href": "01_version_control.html#working-with-remote-repositories", - "title": "Git, GitHub, Pull Requests, and code reviews", - "section": "Working with remote repositories", - "text": "Working with remote repositories\n\n\ngit clone: creates a copy of the codebase on your local machine.\ngit push: pushes changes back to the remote repository.\ngit pull: pulls changes from the remote repository." + "objectID": "03_oop.html#collections-abstract-base-classes-2", + "href": "03_oop.html#collections-abstract-base-classes-2", + "title": "Object oriented design in Python", + "section": "Collections Abstract Base Classes", + "text": "Collections Abstract Base Classes\n\n\n\n\n\nclassDiagram\n Container <|-- Collection\n Sized <|-- Collection\n Iterable <|-- Collection\n Collection <|-- Sequence\n Collection <|-- Set\n Sequence <|-- MutableSequence\n Mapping <|-- MutableMapping\n Collection <|-- Mapping\n\n MutableSequence <|-- List\n Sequence <|-- Tuple\n MutableMapping <|-- Dict" }, { - "objectID": "01_version_control.html#branching-and-merging", - "href": "01_version_control.html#branching-and-merging", - "title": "Git, GitHub, Pull Requests, and code reviews", - "section": "Branching and Merging", - "text": "Branching and Merging\n\nA branch is a separate version of your code that you can work on independently from the main branch.\ngit merge: merges changes back into the main branch (we will do this from GitHub)" + "objectID": "03_oop.html#pythonic", + "href": "03_oop.html#pythonic", + "title": "Object oriented design in Python", + "section": "Pythonic", + "text": "Pythonic\nIf you want your code to be Pythonic, you have to be familiar with these types and their methods.\nDundermethods:\n\n__getitem__\n__setitem__\n__len__\n__contains__\n…" }, { - "objectID": "01_version_control.html#git-hosting-platforms", - "href": "01_version_control.html#git-hosting-platforms", - "title": "Git, GitHub, Pull Requests, and code reviews", - "section": "Git hosting platforms", - "text": "Git hosting platforms" + "objectID": "03_oop.html#duck-typing", + "href": "03_oop.html#duck-typing", + "title": "Object oriented design in Python", + "section": "Duck typing", + "text": "Duck typing\n\n\n“If it walks like a duck and quacks like a duck, it’s a duck”\nFrom the perspective of the caller, it doesn’t matter if it is a rubber duck or a real duck.\nThe type of the object is not important, as long as it has the right methods.\nPython is different than C# or Java, where you would have to create an interface IToolbox and implement it for Toolbox." }, { - "objectID": "01_version_control.html#github", - "href": "01_version_control.html#github", - "title": "Git, GitHub, Pull Requests, and code reviews", - "section": "GitHub", - "text": "GitHub\n\n\nGit repository hosting service\nCollaborate with others on codebase\nFork a repository to work on your own version\nPull requests for code review and merging changes\nIssue tracking and project management tools\nGitHub Pages for hosting websites" + "objectID": "03_oop.html#duck-typing---example", + "href": "03_oop.html#duck-typing---example", + "title": "Object oriented design in Python", + "section": "Duck typing - Example", + "text": "Duck typing - Example\nAn example is a Scikit learn transformers\n\nfit\ntransform\nfit_transform\n\nIf you want to make a transformer compatible with sklearn, you have to implement these methods." }, { - "objectID": "01_version_control.html#github-flow", - "href": "01_version_control.html#github-flow", - "title": "Git, GitHub, Pull Requests, and code reviews", - "section": "Github flow", - "text": "Github flow\n\n\n\nCreate a branch\nMake changes\nCreate a pull request\nReview\nMerge\n\n\n\n\nClone a repository to work on a copy (optionally: fork first)\nCreate a branch for each new feature or fix\nCommit changes and push to remote repository\nOpen a pull request to propose changes and request code review\nMerge changes back into the main branch" + "objectID": "03_oop.html#duck-typing---example-1", + "href": "03_oop.html#duck-typing---example-1", + "title": "Object oriented design in Python", + "section": "Duck typing - Example", + "text": "Duck typing - Example\nclass PositiveNumberTransformer:\n\n def fit(self, X, y=None):\n # no need to fit (still need to have the method!)\n return self\n\n def transform(self, X):\n return np.abs(X)\n\n def fit_transform(self, X, y=None):\n return self.fit(X, y).transform(X)" }, { - "objectID": "01_version_control.html#time-for-a-discussion", - "href": "01_version_control.html#time-for-a-discussion", - "title": "Git, GitHub, Pull Requests, and code reviews", - "section": "Time for a discussion", - "text": "Time for a discussion\nDiscuss in learning teams (15 minutes):\n\nIntroduce your project briefly\nThink about a project you’ve worked on in the past that involved collaborating with others on code. What challenges did you face, and how do you think Git and GitHub could have helped to address those challenges?\n\nAfter break out session:\n\nOne person from each team briefly presents their discussion outcomes\n\n\n\nWhat is the benefit of working in branches?\nWhat are some best practices for collaborating on code with others, and how can Git and GitHub help to support those best practices?" + "objectID": "03_oop.html#duck-typing---mixins", + "href": "03_oop.html#duck-typing---mixins", + "title": "Object oriented design in Python", + "section": "Duck typing - Mixins", + "text": "Duck typing - Mixins\nWe can inherit some behavior from sklearn.base.TransformerMixin\nfrom sklearn.base import TransformerMixin\n\nclass RemoveOutliersTransformer(TransformerMixin):\n\n def __init__(self, lower_bound, upper_bound):\n self.lower_bound = lower_bound\n self.upper_bound = upper_bound\n self.lower_ = None\n self.upper_ = None\n\n def fit(self, X, y=None):\n self.lower_ = np.quantile(X, self.lower_bound)\n self.upper_ = np.quantile(X, self.upper_bound)\n\n def transform(self, X):\n return np.clip(X, self.lower_, self.upper_)\n\n # def fit_transform(self, X, y=None):\n # we get this for free, from TransformerMixin" }, { - "objectID": "01_version_control.html#desktop-application-github-desktop", - "href": "01_version_control.html#desktop-application-github-desktop", - "title": "Git, GitHub, Pull Requests, and code reviews", - "section": "Desktop Application: GitHub Desktop", - "text": "Desktop Application: GitHub Desktop" + "objectID": "03_oop.html#lets-revisit-the-date-interval", + "href": "03_oop.html#lets-revisit-the-date-interval", + "title": "Object oriented design in Python", + "section": "Let’s revisit the (date) Interval", + "text": "Let’s revisit the (date) Interval\nThe Interval class represent an interval in time.\nclass Interval:\n def __init__(self, start, end):\n self.start = start\n self.end = end\n\n def __contains__(self, x):\n return self.start < x < self.end\n\n>>> dr = Interval(date(2020, 1, 1), date(2020, 1, 31))\n\n>>> date(2020,1,15) in dr\nTrue\n>>> date(1970,1,1) in dr\nFalse\n\nWhat if we want to make another type of interval, e.g. a interval of numbers \\([1.0, 2.0]\\)?" }, { - "objectID": "01_version_control.html#demo", - "href": "01_version_control.html#demo", - "title": "Git, GitHub, Pull Requests, and code reviews", - "section": "Demo", - "text": "Demo" + "objectID": "03_oop.html#a-number-interval", + "href": "03_oop.html#a-number-interval", + "title": "Object oriented design in Python", + "section": "A number interval", + "text": "A number interval\nclass Interval:\n def __init__(self, start, end):\n self.start = start\n self.end = end\n\n def __contains__(self, x):\n return self.start < x < self.end\n \n>>> interval = Interval(5, 10)\n\n>>> 8 in interval\nTrue\n>>> 12 in interval\nFalse\n\nAs long as the start, end and x are comparable, the Interval class is a generic class able to handle integers, floats, dates, datetimes, strings …" }, { - "objectID": "01_version_control.html#github-best-practices", - "href": "01_version_control.html#github-best-practices", - "title": "Git, GitHub, Pull Requests, and code reviews", - "section": "Github best practices", - "text": "Github best practices\n\n\nCommit often\nUse descriptive commit messages\nKeep pull requests small and focused\nUse “issues” to track work\nReview code regularly" + "objectID": "03_oop.html#postels-law", + "href": "03_oop.html#postels-law", + "title": "Object oriented design in Python", + "section": "Postel’s law", + "text": "Postel’s law\na.k.a. the Robustness principle of software design\n\nBe liberal in what you accept\nBe conservative in what you send\n\n\ndef process(number: Union[int,str,float]) -> int:\n # make sure number is an int from now on\n number = int(number)\n\n result = number * 2\n return result" }, { - "objectID": "01_version_control.html#resources", - "href": "01_version_control.html#resources", - "title": "Git, GitHub, Pull Requests, and code reviews", - "section": "Resources", - "text": "Resources\n\nGitHub: quickstart\nRealPython: git and github intro\nDatacamp: introduction to Git" + "objectID": "03_oop.html#section", + "href": "03_oop.html#section", + "title": "Object oriented design in Python", + "section": "", + "text": "The consumers of your package (future self), will be grateful if you are not overly restricitive in what types you accept as input." }, { - "objectID": "01_version_control.html#word-list", - "href": "01_version_control.html#word-list", - "title": "Git, GitHub, Pull Requests, and code reviews", - "section": "Word list", - "text": "Word list\n\nClone\n\nmaking a local copy of a remote repository on your computer.\n\nRemote\n\na reference to a Git repository that is hosted on a remote server, typically on a service like GitHub.\n\nCommit\n\na record of changes made to a repository, including the changes themselves and a message describing what was changed.\n\nStage\n\nselecting changes that you want to include in the next commit.\n\nPush\n\nsending changes from your local repository to a remote repository.\n\nPull\n\nretrieving changes from a remote repository and merging them into your local repository.\n\nBranch\n\na separate line of development that can be used to work on new features or bug fixes without affecting the main codebase.\n\nPull request\n\na way to propose changes to a repository by asking the repository owner to “pull” in the changes from a branch or fork.\n\nStash\n\ntemporarily save changes that are not ready to be committed (bring them back later when needed).\n\nMerge\n\nthe process of combining changes from one branch or fork into another, typically the main codebase.\n\nRebase\n\na way to integrate changes from one branch into another by applying the changes from the first branch to the second branch as if they were made there all along.\n\nMerge conflict\n\nwhen Git is unable to automatically merge changes from two different branches, because the changes overlap or conflict.\n\nCheckout\n\nswitching between different branches or commits in a repository.\n\nFork\n\na copy of a repository that you create on your own account, which you can modify without affecting the original repository." + "objectID": "03_oop.html#refactoring", + "href": "03_oop.html#refactoring", + "title": "Object oriented design in Python", + "section": "Refactoring", + "text": "Refactoring\n\n\nRefactoring is a way to improve the design of existing code\nChanging a software system in such a way that it does not alter the external behavior of the code, yet improves its internal structure\nRefactoring is a way to make code more readable and maintainable\nHousekeeping" }, { - "objectID": "01_version_control.html#summary", - "href": "01_version_control.html#summary", - "title": "Git, GitHub, Pull Requests, and code reviews", - "section": "Summary", - "text": "Summary\n\n\nVersion control is a tool for managing changes to code\nGit is a distributed version control system (software)\nGitHub is a platform for hosting and collaborating on Git repositories\nGitHub Desktop is a GUI for Git (and GitHub)\nPull requests are a way to propose changes to a repository\n\n\n\n\n\nPython package development" + "objectID": "03_oop.html#common-refactoring-techniques", + "href": "03_oop.html#common-refactoring-techniques", + "title": "Object oriented design in Python", + "section": "Common refactoring techniques:", + "text": "Common refactoring techniques:\n\nExtract method\nExtract variable\nRename method\nRename variable\nRename class\nInline method\nInline variable\nInline class" }, { - "objectID": "07_packaging.html#packaging", - "href": "07_packaging.html#packaging", - "title": "Distributing your Python package", - "section": "Packaging", - "text": "Packaging\nPackaging means creating a package that can be installed by pip.\nThere are many ways to create an installable package, and many ways to distribute it.\nWe will show how to create a package using hatchling, and how to distribute it on GitHub, PyPI and a private PyPI server." + "objectID": "03_oop.html#rename-variable", + "href": "03_oop.html#rename-variable", + "title": "Object oriented design in Python", + "section": "Rename variable", + "text": "Rename variable\nBefore\nn = 0\nfor v in y:\n if v < 0:\n n = n + 1\n\nAfter\nFREEZING_POINT = 0.0\nn_freezing_days = 0\nfor temp in daily_max_temperatures:\n if temp < FREEZING_POINT:\n n_freezing_days = n_freezing_days + 1" }, { - "objectID": "07_packaging.html#benefits-of-packaging", - "href": "07_packaging.html#benefits-of-packaging", - "title": "Distributing your Python package", - "section": "Benefits of packaging", - "text": "Benefits of packaging\n\n\nDistribute your package to others\nInstall your package with pip\nSpecify dependencies\nReproducibility\nSpecify version\nRelease vs. development versions" + "objectID": "03_oop.html#extract-variable", + "href": "03_oop.html#extract-variable", + "title": "Object oriented design in Python", + "section": "Extract variable", + "text": "Extract variable\nBefore\ndef predict(x):\n return min(0.0, 0.5 + 2.0 * min(0,x) + (random.random() - 0.5) / 10.0)\n\nAfter\ndef predict(x):\n scale = 10.0\n error = (random.random() - 0.5) / scale)\n a = 0.5\n b = 2.0 \n draft = a + b * x + error\n return min(0.0, draft)" }, { - "objectID": "07_packaging.html#packaging-workflow", - "href": "07_packaging.html#packaging-workflow", - "title": "Distributing your Python package", - "section": "Packaging workflow", - "text": "Packaging workflow\n\nCreate a pyproject.toml in the root folder of the project\nBuild a package (e.g. myproject-0.1.0-py3-none-any.whl)\nUpload the package to location, where others can find it" + "objectID": "03_oop.html#extract-method", + "href": "03_oop.html#extract-method", + "title": "Object oriented design in Python", + "section": "Extract method", + "text": "Extract method\ndef error(scale):\n return (random.random() - 0.5) / scale)\n\ndef linear_model(x, *, a=0.0, b=1.0):\n return a + b * x\n\ndef clip(x, *, min_value=0.0):\n return min(min_value, x)\n\ndef predict(x): \n draft = linear_model(x, a=0.5, b=2.0) + error(scale=10.0)\n return clip(draft, min_value=0.)" }, { - "objectID": "07_packaging.html#pyproject.toml", - "href": "07_packaging.html#pyproject.toml", - "title": "Distributing your Python package", - "section": "pyproject.toml", - "text": "pyproject.toml\n[build-system]\nrequires = [\"hatchling\"]\nbuild-backend = \"hatchling.build\"\n\n[project]\nname = \"my_library\"\nversion = \"0.0.1\"\ndependencies = [\n \"numpy\"\n]\n\nauthors = [\n { name=\"First Last\", email=\"initials@dhigroup.com\" },\n]\ndescription = \"Useful library\"\nreadme = \"README.md\"\nrequires-python = \">=3.7\"\nclassifiers = [\n \"Programming Language :: Python :: 3\",\n \"License :: OSI Approved :: MIT License\",\n \"Development Status :: 2 - Pre-Alpha\",\n \"Operating System :: OS Independent\",\n \"Topic :: Scientific/Engineering\",\n]\n\n[project.optional-dependencies]\ndev = [\"pytest\",\"flake8\",\"black\",\"sphinx\", \"myst-parser\",\"sphinx-book-theme\"]\ntest= [\"pytest\"]\n\n[project.urls]\n\"Homepage\" = \"https://github.com/DHI/my_library\"\n\"Bug Tracker\" = \"https://github.com/DHI/my_library/issues\"" + "objectID": "03_oop.html#inline-method", + "href": "03_oop.html#inline-method", + "title": "Object oriented design in Python", + "section": "Inline method", + "text": "Inline method\nOpposite of extract mehtod.\ndef predict(x): \n draft = linear_model(x, a=0.5, b=2.0) + error(scale=10.0)\n return min(0.0, x)" }, { - "objectID": "07_packaging.html#versioning", - "href": "07_packaging.html#versioning", - "title": "Distributing your Python package", - "section": "Versioning", - "text": "Versioning\nVersioning your package is important for reproducibility and to avoid breaking changes.\n\n\n\nSemantic versioning use three numbers {major}.{minor}.{patch}, e.g. 1.1.0\n\n\nA new major version indicates breaking changes\nA new minor version indicates new features, without breaking changes\nA new patch version indicates a small change, e.g. a bug fix\nEach of the numbers can be higher than 9, e.g. 1.0.0 is more recent than 0.24.12" + "objectID": "03_oop.html#composed-method", + "href": "03_oop.html#composed-method", + "title": "Object oriented design in Python", + "section": "Composed method", + "text": "Composed method\nBreak up a long method into smaller methods." }, { - "objectID": "07_packaging.html#version-1.0", - "href": "07_packaging.html#version-1.0", - "title": "Distributing your Python package", - "section": "Version 1.0", - "text": "Version 1.0\n\n\nA version number of 1.0 indicates that the package is ready for production\nThe API is stable, and breaking changes will only be introduced in new major versions\nThe package is well tested, and the documentation is complete\nStart with version 0.1.0 and increase the version number as you add features" + "objectID": "03_oop.html#composed-method-1", + "href": "03_oop.html#composed-method-1", + "title": "Object oriented design in Python", + "section": "Composed method", + "text": "Composed method\n\nDivide your program into methods that perform one identifiable task\nKeep all of the operations in a method at the same level of abstraction.\nThis will naturally result in programs with many small methods, each a few lines long.\nWhen you use Extract method a bunch of times on a method the original method becomes a Composed method." }, { - "objectID": "07_packaging.html#breaking-changes", - "href": "07_packaging.html#breaking-changes", - "title": "Distributing your Python package", - "section": "Breaking changes", - "text": "Breaking changes\nWhat is a breaking change?\n\n\nRemoving a function\nChanging the name of a function\nChanging the signature of a function (arguments, types, return value)\n\n\n\nTry to avoid breaking changes, if possible, but if you do, increase the major version number!" + "objectID": "02_function_classes.html#functions-as-black-boxes", + "href": "02_function_classes.html#functions-as-black-boxes", + "title": "Functions, classes and modules", + "section": "Functions as black boxes", + "text": "Functions as black boxes\n\n\n\n\nflowchart LR\n A(Input A) --> F[\"Black box\"]\n B(Input B) --> F\n F --> O(Output)\n\n style F fill:#000,color:#fff,stroke:#333,stroke-width:4px\n\n\n\n\n\n\n\nA function is a black box that takes some input and produces some output.\nThe input and output can be anything, including other functions.\nAs long as the input and output are the same, the function body can be modified." }, { - "objectID": "07_packaging.html#installing-specific-versions", - "href": "07_packaging.html#installing-specific-versions", - "title": "Distributing your Python package", - "section": "Installing specific versions", - "text": "Installing specific versions\n\npip install my_library will install the latest version\npip install my_library==1.0.0 will install version 1.0.0\npip install my_library>=1.0.0 will install version 1.0.0 or higher" + "objectID": "02_function_classes.html#pure-functions", + "href": "02_function_classes.html#pure-functions", + "title": "Functions, classes and modules", + "section": "Pure functions", + "text": "Pure functions\nA pure function returns the same output for the same input.\ndef f(x)\n return x**2\n\n>> f(2)\n4\n>> f(2)\n4" }, { - "objectID": "07_packaging.html#pre-release-versions", - "href": "07_packaging.html#pre-release-versions", - "title": "Distributing your Python package", - "section": "Pre-release versions", - "text": "Pre-release versions\n\n\n\nVersions that are not ready for production\nIndicated by a suffix, e.g. 1.0.0rc1\nWill not be installed by default\nCan be installed with pip install my_library==1.0.0rc1\nListed on PyPI, but not on the search page" + "objectID": "02_function_classes.html#side-effects", + "href": "02_function_classes.html#side-effects", + "title": "Functions, classes and modules", + "section": "Side effects", + "text": "Side effects\nA function can have side effects, like appending to a a file\ndef f_with_side_effect(x):\n with open(\"output.txt\", \"a\") as f:\n f.write(str(x))\n return x**2\n\nThe function has x as input, returns the square of x, but also appends x to a file. If you run the function a second time, the file will contain two lines." }, { - "objectID": "07_packaging.html#license", - "href": "07_packaging.html#license", - "title": "Distributing your Python package", - "section": "License", - "text": "License\n\n\nA license is a legal agreement between you and others who use your package\nIf you do not specify a license, others cannot use your package legally\nThe license is specified in the pyproject.toml file\nRead more about licenses on https://choosealicense.com/\nCheck if your package is compatible with the license of the dependencies" + "objectID": "02_function_classes.html#side-effects-1", + "href": "02_function_classes.html#side-effects-1", + "title": "Functions, classes and modules", + "section": "Side effects", + "text": "Side effects\nPure functions with no side effects are easier to reason about.\nBut sometimes side effects are necessary.\n\nWriting to a file\nWriting to a database\nPrinting to the screen\nCreating a plot" }, { - "objectID": "07_packaging.html#dependencies", - "href": "07_packaging.html#dependencies", - "title": "Distributing your Python package", - "section": "Dependencies", - "text": "Dependencies\n\n\nApplication\nA program that is run by a user\n\ncommand line tool\nscript\nweb application\n\nPin versions to ensure reproducibility, e.g. numpy==1.11.0\n\nLibrary\nA program that is used by another program\n\nPython package\nLow level library (C, Fortran, Rust, …)\n\nMake the requirements as loose as possible, e.g. numpy>=1.11.0\n\n\n\nMake the requirements loose, to avoid conflicts with other packages." + "objectID": "02_function_classes.html#modifying-input-arguments", + "href": "02_function_classes.html#modifying-input-arguments", + "title": "Functions, classes and modules", + "section": "Modifying input arguments", + "text": "Modifying input arguments\ndef difficult_function(values):\n for i in range(len(values)):\n values[i] = min(0, values[i]) # 😟\n return values\n\n>>> x = [1,2,-1]\n>>> difficult_function(x)\n>>> x\n[0,0,-1]\n\nThis function modifies the input array, which might come as a surprise. The array is passed by reference, so the function can modify it." }, { - "objectID": "07_packaging.html#pyproject.toml-1", - "href": "07_packaging.html#pyproject.toml-1", - "title": "Distributing your Python package", - "section": "pyproject.toml", - "text": "pyproject.toml\n[build-system]\nrequires = [\"hatchling\"]\nbuild-backend = \"hatchling.build\"\n\n[project]\nname = \"my_library\"\nversion = \"0.0.1\"\ndependencies = [\n \"numpy\"\n]\n\nauthors = [\n { name=\"First Last\", email=\"initials@dhigroup.com\" },\n]\ndescription = \"Useful library\"\nreadme = \"README.md\"\nrequires-python = \">=3.7\"\nclassifiers = [\n \"Programming Language :: Python :: 3\",\n \"License :: OSI Approved :: MIT License\",\n \"Development Status :: 2 - Pre-Alpha\",\n \"Operating System :: OS Independent\",\n \"Topic :: Scientific/Engineering\",\n]\n\n[project.optional-dependencies]\ndev = [\"pytest\",\"flake8\",\"black\",\"sphinx\", \"myst-parser\",\"sphinx-book-theme\"]\ntest= [\"pytest\"]\n\n[project.urls]\n\"Homepage\" = \"https://github.com/DHI/my_library\"\n\"Bug Tracker\" = \"https://github.com/DHI/my_library/issues\"\n\n\nMandatory dependencies are specified in the dependencies section.\nOptional dependencies are specified in the optional-dependencies section." + "objectID": "02_function_classes.html#positional-arguments", + "href": "02_function_classes.html#positional-arguments", + "title": "Functions, classes and modules", + "section": "Positional arguments", + "text": "Positional arguments\ndef f(x, y):\n return x + y\n\n>>> f(1, 2)\n3" }, { - "objectID": "07_packaging.html#classifiers", - "href": "07_packaging.html#classifiers", - "title": "Distributing your Python package", - "section": "Classifiers", - "text": "Classifiers\nclassifiers = [\n \"Programming Language :: Python :: 3\",\n \"License :: OSI Approved :: MIT License\",\n \"Development Status :: 2 - Pre-Alpha\",\n \"Operating System :: OS Independent\",\n \"Topic :: Scientific/Engineering\",\n]\n\nClassifiers are used to categorize your package\nLess relevant for internal packages\nOperating system (Windows, Linux, MacOS)\nDevelopment status (Alpha, Beta, Production/Stable)" + "objectID": "02_function_classes.html#keyword-arguments", + "href": "02_function_classes.html#keyword-arguments", + "title": "Functions, classes and modules", + "section": "Keyword arguments", + "text": "Keyword arguments\ndef f(x, y):\n return x + y\n\n>>> f(x=1, y=2)\n3" }, { - "objectID": "07_packaging.html#packaging-non-python-files", - "href": "07_packaging.html#packaging-non-python-files", - "title": "Distributing your Python package", - "section": "Packaging non-Python files", - "text": "Packaging non-Python files\n\nIncluding non-Python files can be useful for e.g. machine learning models.\nIf you use hatchling, you can include non-Python files in your package.\nhatchling uses .gitignore to determine which files to include." + "objectID": "02_function_classes.html#positional-arguments-1", + "href": "02_function_classes.html#positional-arguments-1", + "title": "Functions, classes and modules", + "section": "Positional arguments", + "text": "Positional arguments\n\n\nVersion 1\ndef is_operable(height, period):\n\n return height < 2.0 and period < 6.0\n\n>>> is_operable(1.0, 3.0)\nTrue\n\nVersion 2\ndef is_operable(period, height=0.0):\n # dont forget, that arguments are swapped 👍\n return height < 2.0 and period < 6.0\n\n>>> is_operable(1.0, 3.0)\nFalse 😟\n\n\n\nThe order of the arguments is swapped, since we want to make height an optional argument (more on that later). This breaks existing code, since the order of the arguments is changed." }, { - "objectID": "07_packaging.html#github-secrets", - "href": "07_packaging.html#github-secrets", - "title": "Distributing your Python package", - "section": "GitHub secrets", - "text": "GitHub secrets\n\nStore sensitive information, e.g. passwords, in your repository.\nSecrets are encrypted, and only visible to you and GitHub Actions.\nAdd secrets in the repository settings.\n\nTo use secrets as environment variables in GitHub Actions, add them to the env section of the workflow:\nenv:\n TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}\n TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}" + "objectID": "02_function_classes.html#keyword-only-arguments", + "href": "02_function_classes.html#keyword-only-arguments", + "title": "Functions, classes and modules", + "section": "Keyword only arguments", + "text": "Keyword only arguments\ndef f(*, x, y):\n return x + y\n\n>>> f(1,2)\nTraceback (most recent call last):\n File \"<stdin>\", line 1, in <module>\nTypeError: f() takes 0 positional arguments but 2 were given" }, { - "objectID": "07_packaging.html#github-actions", - "href": "07_packaging.html#github-actions", - "title": "Distributing your Python package", - "section": "GitHub Actions", - "text": "GitHub Actions\n\n\n.github/workflows/python-package.yml\n\nname: Publish Python Package\non:\n release:\n types: [created]\njobs:\n deploy:\n runs-on: ubuntu-latest\n steps:\n - uses: actions/checkout@v2\n - name: Set up Python\n uses: actions/setup-python@v2\n with:\n python-version: '3.10'\n - name: Install dependencies\n run: |\n python -m pip install --upgrade pip\n pip install build\n - name: Build package\n run: python -m build\n \n - name: Publish to PyPI\n env:\n TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}\n TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}\n run: |\n twine upload dist/*" + "objectID": "02_function_classes.html#optional-default-arguments", + "href": "02_function_classes.html#optional-default-arguments", + "title": "Functions, classes and modules", + "section": "Optional (default) arguments", + "text": "Optional (default) arguments\ndef f(x, n=2):\n return x**n\n\n>>> f(2)\n4\n>>> f(2, n=3)\n8\n\nMakes it easy to use a function with many arguments." }, { - "objectID": "07_packaging.html#private-pypi-server", - "href": "07_packaging.html#private-pypi-server", - "title": "Distributing your Python package", - "section": "Private PyPI server", - "text": "Private PyPI server\n\nPrivate packages can be be hosted on e.g. Azure Arfifacts or Posit Package Manager.\nThese servers behaves like PyPI, and can be used with pip\nAccess policies can be used to control who can install packages.\n\n\nExample:\n$ pip install --extra-index-url https://pkgs.dev.azure.com/dhigroup/_packaging/pond/pypi/simple/ sampling\nLooking in indexes: https://pypi.org/simple, https://pkgs.dev.azure.com/dhigroup/_packaging/pond/pypi/simple/\n...\nSuccessfully installed sampling-0.0.1" + "objectID": "02_function_classes.html#mutable-default-arguments", + "href": "02_function_classes.html#mutable-default-arguments", + "title": "Functions, classes and modules", + "section": "Mutable default arguments", + "text": "Mutable default arguments\nPython’s default arguments are evaluated once when the function is defined, not each time the function is called.\n\ndef add_to_cart(x, cart=[]): # this line is evaluated only once 😮\n cart.append(x)\n return cart\n\n>>> add_to_cart(1, cart=[2])\n[2, 1]\n\n>>> add_to_cart(1)\n[1]\n>>> add_to_cart(2)\n[1, 2]\n\nPython’s default arguments are evaluated once when the function is defined, not each time the function is called (like it is in say, Ruby). This means that if you use a mutable default argument and mutate it, you will and have mutated that object for all future calls to the function as well." }, { - "objectID": "07_packaging.html#installing-a-development-version", - "href": "07_packaging.html#installing-a-development-version", - "title": "Distributing your Python package", - "section": "Installing a development version", - "text": "Installing a development version\n\nInstall latest dev version, e.g. pip install https://github.com/DHI/mikeio/archive/main.zip\nInstall from fix-interp branch, e.g. pip install https://github.com/DHI/mikeio/archive/fix-interp.zip" + "objectID": "02_function_classes.html#how-to-use-default-mutable-arguments", + "href": "02_function_classes.html#how-to-use-default-mutable-arguments", + "title": "Functions, classes and modules", + "section": "How to use default (mutable) arguments", + "text": "How to use default (mutable) arguments\ndef add_to_cart_safe(x, cart=None):\n if cart is None:\n cart = [] # this line is evaluated each time the function is called\n cart.append(x)\n return cart" }, { - "objectID": "07_packaging.html#recap", - "href": "07_packaging.html#recap", - "title": "Distributing your Python package", - "section": "Recap", - "text": "Recap\n\nGit, Pull Requests, and code reviews\nPython functions, classes, and modules\nTypes, abstraction, and refactoring\nTesting and auto-formatting\nDependencies and GitHub actions\nDocumentation\nDistributing your package" + "objectID": "02_function_classes.html#changing-return-types", + "href": "02_function_classes.html#changing-return-types", + "title": "Functions, classes and modules", + "section": "Changing return types", + "text": "Changing return types\nSince Python is a dynamic language, the type of the returned variable is allowed to vary.\ndef foo(x):\n if x >=0:\n return x\n else:\n return \"x is negative\"\n\nBut it usually a bad idea, since you can not tell from reading the code, which type will be returned." }, { - "objectID": "07_packaging.html#git-pull-requests-and-code-reviews", - "href": "07_packaging.html#git-pull-requests-and-code-reviews", - "title": "Distributing your Python package", - "section": "Git, Pull Requests, and code reviews", - "text": "Git, Pull Requests, and code reviews" + "objectID": "02_function_classes.html#changing-return-types-1", + "href": "02_function_classes.html#changing-return-types-1", + "title": "Functions, classes and modules", + "section": "Changing return types", + "text": "Changing return types\ndef is_operable(height, period):\n if height < 10:\n return height < 5.0 and period > 4.0\n else:\n return \"No way!\"\n\n>>> if is_operable(height=12.0, period=5.0):\n... print(\"Go ahead!\")\n...\nGo ahead!\n\n\n\n\n\n\n\nImportant\n\n\nIs this the result you expected?\n\n\n\n\n\nA non-empty string or a non-zero value is considered “truthy” in Python!" }, { - "objectID": "07_packaging.html#github-flow", - "href": "07_packaging.html#github-flow", - "title": "Distributing your Python package", - "section": "Github flow", - "text": "Github flow\n\n\nCreate a branch\nMake changes\nCreate a pull request\nReview\nMerge" + "objectID": "02_function_classes.html#type-hints", + "href": "02_function_classes.html#type-hints", + "title": "Functions, classes and modules", + "section": "Type hints", + "text": "Type hints\nPython is a dynamically typed language -> the type of a variable is determined at runtime.\n\nBut we can add type hints to help the reader (and the code editor).\ndef is_operable(height: float, period: float) -> bool:\n ..." }, { - "objectID": "07_packaging.html#github-best-practices", - "href": "07_packaging.html#github-best-practices", - "title": "Distributing your Python package", - "section": "Github best practices", - "text": "Github best practices\n\nCommit often\nUse descriptive commit messages\nKeep pull requests small and focused\nUse “issues” to track work\nReview code regularly" + "objectID": "02_function_classes.html#time-for-a-discussion", + "href": "02_function_classes.html#time-for-a-discussion", + "title": "Functions, classes and modules", + "section": "Time for a discussion", + "text": "Time for a discussion\nDiscuss in learning teams (15 minutes):\n\nIntroduce yourself briefly\nDiscuss your experience with Object Oriented Programming, why are classes useful?\nMention some problems with poorly designed code\n\nYour own experience\nFrom the book\n\n\nAfter break out session:\n\nSelected person from each team briefly presents their discussion" }, { - "objectID": "07_packaging.html#python-functions-classes-and-modules", - "href": "07_packaging.html#python-functions-classes-and-modules", - "title": "Distributing your Python package", - "section": "Python functions, classes, and modules", - "text": "Python functions, classes, and modules" + "objectID": "02_function_classes.html#classes", + "href": "02_function_classes.html#classes", + "title": "Functions, classes and modules", + "section": "Classes", + "text": "Classes\nclass WeirdToolbox:\n tools = [] # class variable ☹️\n\n\n>>> t1 = WeirdToolbox()\n>>> t1.tools.append(\"hammer\")\n>>> t1.tools\n[\"hammer\"]\n\n>>> t2 = WeirdToolbox()\n>>> t2.tools.append(\"screwdriver\")\n>>> t2.tools\n[\"hammer\", \"screwdriver\"]\n\nClass variables are rarely what you want, since they are shared between all instances of the class." }, { - "objectID": "07_packaging.html#functions-as-black-boxes", - "href": "07_packaging.html#functions-as-black-boxes", - "title": "Distributing your Python package", - "section": "Functions as black boxes", - "text": "Functions as black boxes\n\n\n\n\nflowchart LR\n A(Input A) --> F[\"Black box\"]\n B(Input B) --> F\n F --> O(Output)\n\n style F fill:#000,color:#fff,stroke:#333,stroke-width:4px\n\n\n\n\n\n\nA function is a black box that takes some input and produces some output.\nThe input and output can be anything, including other functions.\nAs long as the input and output are the same, the function body can be modified." + "objectID": "02_function_classes.html#classes-1", + "href": "02_function_classes.html#classes-1", + "title": "Functions, classes and modules", + "section": "Classes", + "text": "Classes\nclass Toolbox:\n def __init__(self):\n self.tools = [] # instance variable 😃\n\n>>> t1 = Toolbox()\n>>> t1.tools.append(\"hammer\")\n>>> t1.tools\n[\"hammer\"]\n\n>>> t2 = Toolbox()\n>>> t2.tools.append(\"screwdriver\")\n>>> t2.tools\n[\"screwdriver\"]\n\nInstance variables are created when the instance is created, and are unique to each instance." }, { - "objectID": "07_packaging.html#naming-conventions---general", - "href": "07_packaging.html#naming-conventions---general", - "title": "Distributing your Python package", - "section": "Naming conventions - general", - "text": "Naming conventions - general\n\nUse lowercase characters\nSeparate words with underscores\n\nmodel_name = \"NorthSeaModel\"\nn_epochs = 100\n\ndef my_function():\n pass" + "objectID": "02_function_classes.html#static-methods", + "href": "02_function_classes.html#static-methods", + "title": "Functions, classes and modules", + "section": "Static methods", + "text": "Static methods\nfrom datetime import date\n\nclass Interval:\n def __init__(self, start:date, end:date):\n self.start = start\n self.end = end\n\n>>> dr = Interval(date(2020, 1, 1), date(2020, 1, 31))\n>>> dr.start\ndatetime.date(2020, 1, 1)\n>>> dr.end\ndatetime.date(2020, 1, 31)\n\nHere is an example of useful class, but it is a bit cumbersome to create an instance." }, { - "objectID": "07_packaging.html#constants", - "href": "07_packaging.html#constants", - "title": "Distributing your Python package", - "section": "Constants", - "text": "Constants\n\nUse all uppercase characters\n\nGRAVITY = 9.81\n\nAVOGADRO_CONSTANT = 6.02214076e23\n\nSECONDS_IN_A_DAY = 86400\n\nN_LEGS_PER_ANIMAL = {\n \"human\": 2,\n \"dog\": 4,\n \"spider\": 8,\n}" + "objectID": "02_function_classes.html#static-methods-1", + "href": "02_function_classes.html#static-methods-1", + "title": "Functions, classes and modules", + "section": "Static methods", + "text": "Static methods\nfrom datetime import date\n\nclass Interval:\n def __init__(self, start:date, end:date):\n self.start = start\n self.end = end\n\n @staticmethod\n def from_string(date_string):\n start_str, end_str = date_string.split(\"|\")\n start = date.fromisoformat(start_str)\n end = date.fromisoformat(end_str)\n return Interval(start, end)\n\n>>> dr = Interval.from_string(\"2020-01-01|2020-01-31\")\n>>> dr\n<__main__.Interval at 0x7fb99efcfb90>\n\nSince we commonly use ISO formatted dates separated by a pipe, we can add a static method to create an instance from a string. This makes it easier to create an instance." }, { - "objectID": "07_packaging.html#classes", - "href": "07_packaging.html#classes", - "title": "Distributing your Python package", - "section": "Classes", - "text": "Classes\n\nUse CamelCase for the name of the class\nUse lowercase characters for the name of the methods\nSeparate words with underscores\n\nclass RandomClassifier:\n\n def fit(self, X, y):\n self.classes_ = np.unique(y)\n\n def predict(self, X):\n return np.random.choice(self.classes_, size=len(X))\n\n def fit_predict(self, X, y):\n self.fit(X, y)\n return self.predict(X)" + "objectID": "02_function_classes.html#dataclasses", + "href": "02_function_classes.html#dataclasses", + "title": "Functions, classes and modules", + "section": "Dataclasses", + "text": "Dataclasses\nfrom dataclasses import dataclass\n\n@dataclass\nclass Interval:\n start: date\n end: date\n\n @staticmethod\n def from_string(date_string):\n start_str, end_str = date_string.split(\"|\")\n start = date.fromisoformat(start_str)\n end = date.fromisoformat(end_str)\n return Interval(start, end)\n\n>>> dr = Interval.from_string(\"2020-01-01|2020-01-31\")\n>>> dr\nInterval(start=datetime.date(2020, 1, 1), end=datetime.date(2020, 1, 31))\n\nDataclasses are a new feature in Python 3.7, they are a convenient way to create classes with a few attributes. The variables are instance variables, and the class has a constructor that takes the same arguments as the variables." }, { - "objectID": "07_packaging.html#dataclasses", - "href": "07_packaging.html#dataclasses", - "title": "Distributing your Python package", - "section": "Dataclasses", - "text": "Dataclasses\nimport datetime\nfrom dataclasses import dataclass\n\n\n@dataclass\nclass Interval:\n start: date\n end: date\n\n>>> dr1 = Interval(start=datetime.date(2020, 1, 1), end=datetime.date(2020, 1, 31))\n>>> dr1\nInterval(start=datetime.date(2020, 1, 1), end=datetime.date(2020, 1, 31))\n>>> dr2 = Interval(start=datetime.date(2020, 1, 1), end=datetime.date(2020, 1, 31))\n>>> dr1 == dr2\nTrue" + "objectID": "02_function_classes.html#equality", + "href": "02_function_classes.html#equality", + "title": "Functions, classes and modules", + "section": "Equality", + "text": "Equality\nOn a regular class, equality is based on the memory address of the object.\nclass Interval:\n def __init__(self, start:date, end:date):\n self.start = start\n self.end = end\n\n>>> dr1 = Interval(start=date(2020, 1, 1), end=date(2020, 1, 31))\n>>> dr2 = Interval(start=date(2020, 1, 1), end=date(2020, 1, 31))\n>>> dr1 == dr2\nFalse\n\nThis is not very useful, since we want to compare the values of the attributes." }, { - "objectID": "07_packaging.html#types-abstraction-and-refactoring", - "href": "07_packaging.html#types-abstraction-and-refactoring", - "title": "Distributing your Python package", - "section": "Types, abstraction, and refactoring", - "text": "Types, abstraction, and refactoring" + "objectID": "02_function_classes.html#equality-1", + "href": "02_function_classes.html#equality-1", + "title": "Functions, classes and modules", + "section": "Equality", + "text": "Equality\nclass Interval:\n def __init__(self, start:date, end:date):\n self.start = start\n self.end = end\n\n def __eq__(self, other):\n return self.start == other.start and self.end == other.end\n\n>>> dr1 = Interval(start=date(2020, 1, 1), end=date(2020, 1, 31))\n>>> dr2 = Interval(start=date(2020, 1, 1), end=date(2020, 1, 31))\n>>> dr1 == dr2\nTrue\n\nWe can override the __eq__ method to compare the values of the attributes." }, { - "objectID": "07_packaging.html#pythonic", - "href": "07_packaging.html#pythonic", - "title": "Distributing your Python package", - "section": "Pythonic", - "text": "Pythonic\nIf you want your code to be Pythonic, you have to be familiar with these types and their methods.\nDundermethods:\n\n__getitem__\n__setitem__\n__len__\n__contains__\n…" + "objectID": "02_function_classes.html#data-classes", + "href": "02_function_classes.html#data-classes", + "title": "Functions, classes and modules", + "section": "Data classes", + "text": "Data classes\nfrom dataclasses import dataclass, field\n\n@dataclass\nclass Quantity:\n unit: str = field(compare=True)\n standard_name: field(compare=True)\n name: str = field(compare=False, default=None)\n\n\n>>> t1 = Quantity(name=\"temp\", unit=\"C\", standard_name=\"air_temperature\")\n>>> t2 = Quantity(name=\"temperature\", unit=\"C\", standard_name=\"air_temperature\")\n\n>>> t1 == t2\nTrue\n\n>>> d1 = Quantity(unit=\"m\", standard_name=\"depth\")\n>>> d1 == t2\nFalse" }, { - "objectID": "07_packaging.html#duck-typing", - "href": "07_packaging.html#duck-typing", - "title": "Distributing your Python package", - "section": "Duck typing", - "text": "Duck typing\n\n“If it walks like a duck and quacks like a duck, it’s a duck”\nFrom the perspective of the caller, it doesn’t matter if it is a rubber duck or a real duck.\nThe type of the object is not important, as long as it has the right methods." + "objectID": "02_function_classes.html#data-classes-1", + "href": "02_function_classes.html#data-classes-1", + "title": "Functions, classes and modules", + "section": "Data classes", + "text": "Data classes\n\n\nCompact notation of fields with type hints\nEquality based on values of fields\nUseful string represenation by default\nIt is still a regular class" }, { - "objectID": "07_packaging.html#testing-and-auto-formatting", - "href": "07_packaging.html#testing-and-auto-formatting", - "title": "Distributing your Python package", - "section": "Testing and auto-formatting", - "text": "Testing and auto-formatting" + "objectID": "02_function_classes.html#modules", + "href": "02_function_classes.html#modules", + "title": "Functions, classes and modules", + "section": "Modules", + "text": "Modules\nModules are files containing Python code (functions, classes, constants) that belong together.\n$tree analytics/\nanalytics/\n├── __init__.py\n├── date.py\n└── tools.py\n\nThe analytics package contains two modules:\n\ntools module\ndate module" }, { - "objectID": "07_packaging.html#unit-testing", - "href": "07_packaging.html#unit-testing", - "title": "Distributing your Python package", - "section": "Unit testing", - "text": "Unit testing\n\n\n\n\n\n\nDefinition “Unit”\n\n\n\nA small, fundamental piece of code.\nExecuted in isolation with appropriate inputs.\n\n\n\n\n\nA function is typically considered a “unit”\nLines of code within functions are smaller (can’t be isolated)\nClasses are considered bigger (but can be treated as units)" + "objectID": "02_function_classes.html#packages", + "href": "02_function_classes.html#packages", + "title": "Functions, classes and modules", + "section": "Packages", + "text": "Packages\n\n\nA package is a directory containing modules\nEach package in Python is a directory which MUST contain a special file called __init__.py\nThe __init__.py can be empty, and it indicates that the directory it contains is a Python package\n__init__.py can also execute initialization code" }, { - "objectID": "07_packaging.html#a-good-unit-test", - "href": "07_packaging.html#a-good-unit-test", - "title": "Distributing your Python package", - "section": "A good unit test", - "text": "A good unit test\n\nFully automated\nHas full control over all the pieces running (“fake” external dependencies)\nCan be run in any order\nRuns in memory (no DB or file access, for example)\nConsistently returns the same result (no random numbers)\nRuns fast\nTests a single logical concept in the system\nReadable\nMaintainable\nTrustworthy" + "objectID": "02_function_classes.html#init__.py", + "href": "02_function_classes.html#init__.py", + "title": "Functions, classes and modules", + "section": "__init__.py", + "text": "__init__.py\nExample: mikeio/pfs/__init__.py:\nfrom .pfsdocument import Pfs, PfsDocument\nfrom .pfssection import PfsNonUniqueList, PfsSection\n\ndef read_pfs(filename, encoding=\"cp1252\", unique_keywords=False):\n \"\"\"Read a pfs file for further analysis/manipulation\"\"\"\n \n return PfsDocument(filename, encoding=encoding, unique_keywords=unique_keywords)\n\nThe imports in __init__.py let’s you separate the implementation into multiple files.\n>>> mikeio.pfs.pfssection.PfsSection\n<class 'mikeio.pfs.pfssection.PfsSection'>\n>>> mikeio.pfs.PfsSection\n<class 'mikeio.pfs.pfssection.PfsSection'>\n\nThe PfsSection and PfsDocument are imported from the pfssection.py and pfsdocument.py modules. to the mikeio.pfs namespace." }, { - "objectID": "07_packaging.html#thank-you", - "href": "07_packaging.html#thank-you", - "title": "Distributing your Python package", - "section": "Thank you!", - "text": "Thank you!\n\n\n\nPython package development" + "objectID": "02_function_classes.html#how-generic-should-your-code-be", + "href": "02_function_classes.html#how-generic-should-your-code-be", + "title": "Functions, classes and modules", + "section": "How generic should your code be?", + "text": "How generic should your code be?\n\nA good rule of thumb is to make your code as generic as possible, but no more. 🙄\n\n\n\n\n\nTry to anticipate the future, but not too much!" }, { - "objectID": "index.html", - "href": "index.html", - "title": "Python package development", - "section": "", - "text": "Introduction" + "objectID": "02_function_classes.html#homework-until-next-week", + "href": "02_function_classes.html#homework-until-next-week", + "title": "Functions, classes and modules", + "section": "Homework until next week", + "text": "Homework until next week\n\n\nCreate a repository on GitHub to store your code\nCreate a README.md file describing the purpose of the repository. (course project, …\nWrite a function to calculate the fraction of values within a given range, with the lower bound optional\n\nName, inputs, output, types\n\nCreate a class to represent sensor data, e.g. water levels, temperature measured with a fixed interval\n\nName, attributes, methods?\n\n\n\n\n✨ See you next week! 🚀\n\n\n\nPython package development" }, { - "objectID": "index.html#learning-modules", - "href": "index.html#learning-modules", - "title": "Python package development", - "section": "Learning modules", - "text": "Learning modules\n\nGit, Pull Requests, and code reviews\nPython functions, classes, and modules\nTypes, abstraction, and refactoring\nTesting and auto-formatting\nDependencies and GitHub actions\nDocumentation\nDistributing your package\n\n©️ DHI 2023" + "objectID": "02b_naming_conventions.html#variables-function-and-method-names", + "href": "02b_naming_conventions.html#variables-function-and-method-names", + "title": "Python - Naming conventions", + "section": "Variables, function and method names", + "text": "Variables, function and method names\n\nUse lowercase characters\nSeparate words with underscores\n\n\nmodel_name = \"NorthSeaModel\"\nn_epochs = 100\n\ndef my_function():\n pass" + }, + { + "objectID": "02b_naming_conventions.html#constants", + "href": "02b_naming_conventions.html#constants", + "title": "Python - Naming conventions", + "section": "Constants", + "text": "Constants\n\nUse all uppercase characters\n\nGRAVITY = 9.81\n\nAVOGADRO_CONSTANT = 6.02214076e23\n\nSECONDS_IN_A_DAY = 86400\n\nN_LEGS_PER_ANIMAL = {\n \"human\": 2,\n \"dog\": 4,\n \"spider\": 8,\n}\n\nPython will not prevent you from changing the value of a constant, but it is a convention to use all uppercase characters for constants." + }, + { + "objectID": "02b_naming_conventions.html#classes", + "href": "02b_naming_conventions.html#classes", + "title": "Python - Naming conventions", + "section": "Classes", + "text": "Classes\n\nUse CamelCase for the name of the class\nUse lowercase characters for the name of the methods\nSeparate words with underscores\n\n\nclass RandomClassifier: # CamelCase ✅\n\n def fit(self, X, y):\n self.classes_ = np.unique(y)\n\n def predict(self, X):\n return np.random.choice(self.classes_, size=len(X))\n\n def fit_predict(self, X, y): # lowercase ✅\n self.fit(X, y)\n return self.predict(X)" + }, + { + "objectID": "02b_naming_conventions.html#function-return-values", + "href": "02b_naming_conventions.html#function-return-values", + "title": "Python - Naming conventions", + "section": "Function return values", + "text": "Function return values\ndef my_function() -> str:\n return \"42\"\n\ndef my_other_function() -> None: # it doesn't return anything 🤔\n print(\"42\")\n\nIn action:\n>>> my_function()\n'42'\n>>> my_other_function()\n42\n>>> x = my_function()\n>>> x\n'42'\n>>> y = my_other_function()\n>>> y" + }, + { + "objectID": "02b_naming_conventions.html#github-repo-naming-convention", + "href": "02b_naming_conventions.html#github-repo-naming-convention", + "title": "Python - Naming conventions", + "section": "Github repo naming convention", + "text": "Github repo naming convention\n\nThis is just a suggestion\nUse lowercase characters\nSeparate words with dashes\n\nExample: my-awesome-repo\n\n\n\nPython package development" } ] \ No newline at end of file diff --git a/sitemap.xml b/sitemap.xml index fb14df5..6c0e193 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -1,43 +1,47 @@ - https://github.com/DHI/python-package-development/00_introduction.html - 2023-10-18T07:38:07.873Z + https://github.com/DHI/python-package-development/index.html + 2023-10-19T08:52:22.268Z - https://github.com/DHI/python-package-development/02b_naming_conventions.html - 2023-10-18T07:38:04.405Z + https://github.com/DHI/python-package-development/00_introduction.html + 2023-10-19T08:52:21.576Z - https://github.com/DHI/python-package-development/02_function_classes.html - 2023-10-18T07:38:03.069Z + https://github.com/DHI/python-package-development/01_version_control.html + 2023-10-19T08:52:20.568Z - https://github.com/DHI/python-package-development/03_types_protocols.html - 2023-10-18T07:38:01.601Z + https://github.com/DHI/python-package-development/07_packaging.html + 2023-10-19T08:52:19.336Z - https://github.com/DHI/python-package-development/05_dependencies_ci.html - 2023-10-18T07:38:00.021Z + https://github.com/DHI/python-package-development/04_testing.html + 2023-10-19T08:52:17.784Z https://github.com/DHI/python-package-development/06_documentation.html - 2023-10-18T07:37:58.029Z + 2023-10-19T08:52:14.156Z - https://github.com/DHI/python-package-development/04_testing.html - 2023-10-18T07:38:00.829Z + https://github.com/DHI/python-package-development/course_structure.html + 2023-10-19T08:52:16.028Z - https://github.com/DHI/python-package-development/01_version_control.html - 2023-10-18T07:38:02.257Z + https://github.com/DHI/python-package-development/05_dependencies_ci.html + 2023-10-19T08:52:18.560Z - https://github.com/DHI/python-package-development/07_packaging.html - 2023-10-18T07:38:03.977Z + https://github.com/DHI/python-package-development/03_oop.html + 2023-10-19T08:52:20.032Z - https://github.com/DHI/python-package-development/index.html - 2023-10-18T07:38:06.457Z + https://github.com/DHI/python-package-development/02_function_classes.html + 2023-10-19T08:52:21.288Z + + + https://github.com/DHI/python-package-development/02b_naming_conventions.html + 2023-10-19T08:52:21.872Z