Python
Getting Started
Install
Python
- for Windows: get and install Python from https://www.python.org
- for MacOS: follow this guide instead: https://opensource.com/article/19/5/python-3-default-mac
brew install pyenv pyenv install 3.10.10 pyenv global 3.10.10 vim ~/.zshrc # add if command -v pyenv 1>/dev/null 2>&1; then eval "$(pyenv init -)" fi
Editor: Visual Studio Code
excellent and free source-code editor that supports many languages.
- get and install from https://code.visualstudio.com
See Wickie page Visual_Studio_Code for general setup, extension, config...
Python Extensions
- Python
- Pylance
- Black Formatter
- isort
- Ruff
Settings (CTRL + ,)
- Extensions -> Python -> Formatting: Provider = black
- Text Editor -> Editor: Format On Save
- Text Editor -> Files:Eol -> \n
- Linter:
flake8 and pylintRuff
Settings in settings.json
"python.analysis.completeFunctionParens": true, "python.analysis.autoImportCompletions": true, "python.analysis.inlayHints.functionReturnTypes": true, "python.analysis.typeCheckingMode": "strict", "[python]": { "editor.defaultFormatter": "ms-python.black-formatter", "editor.formatOnSave": true, "editor.codeActionsOnSave": { // disable autofix by ruff "source.fixAll": false, // activate isort "source.organizeImports": true }, }, // Python isort "isort.args": [ "--profile", "black" ], // use isort instead "ruff.organizeImports": false
Run your code
CTRL + F5 : run F5 : run in debugger
Standard Template
see header documentation example 1 and example 2 see Google Python Styleguide
#!/usr/bin/env python3.10 # by Torben Menke https://entorb.net """ Here comes the docstring containing the description of this piece of software """ # Built-in/Generic Packages import os # External Packages # import openpyxl print("Moin Moin")
Object Oriented Template
#!/usr/bin/env python3.10 # by Torben Menke https://entorb.net class myDevice: """ Here comes the docstring containing the description of this class """ def __init__(self, devicename: str = "", verbose: bool = False): # name of the device (e.g. for log messages) self.devicename = devicename self.verbose = verbose # whether to log information or be quiet def log(self, msg: str): print(msg) class SMU236(myDevice): """ Here comes the docstring containing the description of this class """ def __init__(self, gpibaddress: float, devicename: str = "", verbose: bool = False): myDevice.__init__(self, devicename, verbose) self.gpibaddress = gpibaddress if __name__ == "__main__": SMU = SMU236(1234) SMU.log("Starting")
Basics
Naming Conventions
module_name, package_name, ClassName, method_name, ExceptionName, function_name, GLOBAL_CONSTANT_NAME, global_var_name, instance_var_name, function_parameter_name, local_var_name
linting / code formatting
use software for handling the code formatting like "black"
pip install black
than activate in editor like vs code
Installing packages
python -m pip install --upgrade pip pip install somemodule # or pip3 install somemodule # or read from file pip install -r requirements.txt # uninstall pip uninstall somemodule # using a web proxy # set proxy for windows cmd session SET HTTPS_PROXY=http://myProxy:8080 (afterwards --proxy setting below no longer required or pip install --proxy http://myProxy:8080 somemodule # list outdated packages pip list --outdated # update package pip install --upgrade pyinstaller # updating all via Windows Powershell (from [1]) pip freeze | %{$_.split('==')[0]} | %{pip install --upgrade $_} # updating all via Bash (from [2]) pip freeze | grep -v '^\-e' | cut -d = -f 1 | xargs -n1 pip install --upgrade # downgrade pip install --upgrade pandas==1.2.4
Variables
del var # delete / undef a variable var = None # sets to null # check if variable is defined if "var" in locals(): pass # for object oriented projects: if "var" in self.__dict__.keys(): pass
Access global variables in functions
var = 123 def test(): global var # point to global instead of creation of local var var = 321
Strings
# num <-> str s = str (i) # int to string f = float(s) # str -> float i = int(s) str(round(f, 1)) # round first # tests s.isdigit() # 0-9 # note isdecimal() does also not match '1.1' # printf: 1 digit s = "{:0.1f}".format(value) s = "%0.1f" % value
Modify Strings
# get string from prompt s = input("Enter Text: ") s = s.strip() # trim spaces from both sides, rstrip for right only s = s.lower() # lower case s = s.upper() # upper case s = s.title() # upper case for first char of word # upper case first letter of each word and also removes multiple and trailing spaces import string s = string.capwords(s) # replace s.replace(x, y) # trim whitespaces from left and right s.strip() # replace all (multiple) whitespaces by single space ' ' s = " ".join(s.split()) # generate key value pairs from dict # key1=value1&key2=value2 param_str = "&".join("=".join(tup) for tup in dict.items()) # repeat string multiple times s * 5 # = s+s+s+s+s
substrings
# find a substring: x in s > True / False if len(s) > 0 # handling substrings a = "abcd" b = a[:1] + "o" + a[2:] > 'aocd' myString="Hello there !bob@" i1 = myString.find("!")+1 i2 = myString.find("@") mySubString=myString[i1:i2] def substr_between(s: str, s1: str, s2: str) -> str: assert s1 in s, f'E: can\'t find \'{s1}\' in \'{s}\ assert s2 in s, f'E: can\'t find \'{s1}\' in \'{s}\ i1 = s.find(s1)+len(s1) i2 = s.find(s2) assert i1 < i2, f'E: \'{s1}\' not before \'{s2}\' in \'{s}\ return s[i1:i2]
Binary, formatted, raw strings
# Binary Strings key = b'asdf' # or key = str.encode('asdf') s = key.decode('ascii') # decode binary strings key = s.encode('ascii') # encode string to binary # Formatted string s = f'' # raw string s = r'c:\Windows\' # no excape of \ needed # convert utf-8 to html umlaute lk_name = "Nürnberg".encode('ascii', 'xmlcharrefreplace').decode() # -> Nürnberg
merge variables in string / sprintf
print ("Renner =", i) print ("Renner = %3d" % i) # leading 0's print (f"Renner = {i}") # place formatted numbers in a string / sprintf "The %03i %s cost %f euros" % (3, "beers", 11.50) > 'The 3 beers cost 11.500000 euros' "The length is %.2f meters" % 72.8958 >'The length is 72.90 meters' p= "%.1f%%/min" % precent
Lists
like arrays in Perl
L = [1,2,3,4,5,6] L = [x for x in range(10)] L = "Word1 Word2 Word3".split() # split by spaces, like QW in Perl, use split(",") to split on "," len(L) L[0:10] # get elements 0-10 for a in L: ...
ATTENTION = generates not a clone but a link
M = L # M's elements are links to L's # clones can be achieved via: M = L.copy # clones L M = L[:] # clones L M = list(L) # clones L
L.append(x) # append a single element L.extend(M) # put elements of list M to the end of List L L.insert(i, x) # insert item x at position int i L.pop() # returns and removes the last item L.pop(i) # returns and removes the item at position int i L.reverse() L = sorted (L, key=str.casefold) # case insentitive / ignore case L.remove(x) # removes the first occurrence of item x L.count(x) # how many items x are in the list i = L.index("word") # find in list / returns the position of the first match in list s = "".join(L) x in L x not in L # search for first match in list: i = l_cont.index(1234) line_footer = l_cont.index("") # list to string s = "\n".join(L) # string to list L = s.split("\n")
Initiate an "empty" list of certain length: consisting of certain number of None elements:
l = [None] * 10
revert reverse order
lines.reverse()
zip: merge 2 lists to list of tuples
data = list(zip(data_x, data_y))
unzip: split list of pairs into 2 lists
data_x, data_y = zip(*data)
trim/strip spaces for each element
lines = [s.strip() for s in lines]
filter list
lines = [x for x in lines if not x.startswith("word")]
Cartesian product of lists / tuples
import itertools for i in itertools.product(*listOfLists): print(i) # remove duplicate values from list myList = list(dict.fromkeys(myList)) # via set myUniqueValues = set(myDict.values())
Filter List
# filter on element contains string l1 = ["asdf", "asdf2", qwertz"] l2 = [elem for elem in l1 if "asdf" in elem]
list -> unique list
mySet = set(myList)
Multi-Level-Loop
- hard coded 3 levels:
for i3 in range(20): for i2 in range(20): for i1 in range(20): l = i1, i2, i3 print(l)
- general approach:
import itertools as it for tup in it.product(range(20), repeat=3): print(tup)
Looping over lists
modify each item in list by adding constant string
l = [s + ';' + v for v in l]
modify item in list
for idx, line in enumerate(cont): if "K1001/1" in line: line = "K1001/1 Test Nr " + str(i) + "\n" cont[idx] = line break
remove empty values from end
while L[-1] == "": # L = L[0:-1] L.pop()
modify or even remove certain items
# from https://stackoverflow.com/a/6024599 # iterates in-situ via index and reversing for i in range(len(somelist) - 1, -1, -1): element = somelist[i] do_action(element) if check(element): del somelist[i]
Multi Dim Lists
lAllPoints = [] lAllPoints.append = ["a", "b", "c"] # or using tuple lAllPoints.append = ("a", "b", "c")
sort multidim list
lAllPoints = sorted(lAllPoints, key=lambda x: x[0], reverse=False) data_all = sorted(data_all, key=lambda row: (row["Wann"], row["Wer"]), reverse=False)
Tuples
Ordered sequence, with no ability to replace or delete items
L = (1,2,3,4,5,6)
list -> tuple
l = tuple(l)
combine 2 tuples
l = la + lb
Dictionaries
like hash in Perl
d = {"keyx": x, "keyy": y} d["keyz"] = z d.keys() ["keyx", "keyy", "keyz"] del d["keyy"] len(d) d.clear() d.copy() d.keys() d.values() d.items() # returns a list of tuples (key, value) d.get(k) # returns value of key k d.get(k, x) # returns value of key k; if k is not in d it returns x d.pop(k) # returns and removes item k d.pop(k, x) # returns and removes item k; if k is not in d it returns x x in d x not in d # loop over all keys and retrieve there values as well for key, value in d.items(): print(f"{key} = {value}") # sort keys: for userid in sorted(dict.keys()): pass # sort values reversed for id, value in sorted(d.items(), key=lambda item: item[1], reverse=True): pass join / merge 2 dicts d.update(d2)
MultiDim Dictionaries
dicProductivity = {} dicProductivity["Cursor"] = {} dicProductivity["Cursor"]["Nr"] = 1 dicProductivity["Cursor"]["Prod"] = 1.909e18 dicProductivity["Cursor"]["Cost"] = 0 dicProductivity["Cursor"]["Img"] = "templates/Shop01Cursor.png" dicProductivity["Grandma"] = {} dicProductivity["Grandma"]["Nr"] = 2 dicProductivity["Grandma"]["Prod"] = 1.725e18 dicProductivity["Grandma"]["Cost"] = 0 dicProductivity["Grandma"]["Img"] = "templates/Shop02Grandma.png" for k in dicProductivity.keys(): print(k) if "Img" in dicProductivity[k]: print("ja")
Alternatively one can use a tuple as key for dictionary:
d = fetchDataAsDict() myTuple = (d["description"], d["meaning"], d["source"], d["fileName"]) dict_with_tuple_as_key[myTuple] = value
Loops
for / while controls
break = exit loop continue = cancel current iteration and go to start of next iteration
ATTENTION: The loops do not create a new variable scope. Only functions and modules introduce a new scope!
for i in range (10): print(i) del (i)
while i <= 100: i+=1 ... if sth: break for i in range(1, 5): print i if sth: continue for f in list :
inline if (requires a dummy else):
print("something") if self.verbose else 0
methods/functions
example: def get_labeled_exif(exif: dict) -> dict: """converts the exif key IDs into strings and returns that readable dict""" labeled = {} for (key, val) in exif.items(): labeled[TAGS.get(key)] = val return labeled
TypeHints
from typing import Any, Dict, cast creds = cast(dict[str, str], tomllib.load(f)) # type: ignore o = cast(Dict[str, Any], tomllib.load(f)) # type: ignore o["sap"] = cast(Dict[str, str], o["sap"]) o["settings"] = cast(Dict[str, str | int | bool], o["settings"]) o["settings"]["sleep_time"] = cast(int, o["settings"]["sleep_time"])
Pylance/Pyright
import tomllib # shows warning: Import "xyz" could not be resolved # fix by import tomllib # pyright: ignore
asserts function argument validation
aus Python Kurs von Carsten Knoll
def eine_funktion(satz, ganzzahl, zahl2, liste): if not type(satz) == str: print "Datentpyfehler: satz" return -1 if not isinstance(ganzzahl, int): print "Datentpyfehler: ganzzahl" return -2 if not isinstance(liste, (tuple, list)): print "Datentpyfehler: liste" return -3 # Kompakteste Variante (empfohlen): assert zahl2 > 0, "Error: zahl2 ist nicht > 0" # Assertation-Error bei Nichterfuellung
def F(x): if not isinstance(x, (float, int)): msg = "Zahl erwartet, %s bekommen" % type(x) raise ValueError(msg) return x**2
better:
def F(x): assert isinstance(x, (float, int)), "Error: x is not of type float or int" return x**2
assert variant in [ "normal", "gray", "cannyedge", ], "Error: variant is not in 'normal', 'gray', 'cannyedge'"
Imports
import sys import datetime import time import math import random import os.path # Import my files import MyFile # without tailing .py # import a file, not stored in the same folder import sys sys.path.append("../libs/MyFile ")
Math
see Python - Math for linear regression
Python 2: get rid of the annoying integer division: [3]
from __future__ import division
Modulo
15 % 4 --> 3
Random
import random random.randint(1000000, 9999999)
Datetime / Date and Time
import datetime as dt # from datetime import date, datetime, timedelta
Create
# date date = dt.date(2023, 12, 31) date = dt.date.fromisocalendar(int(week), int(year), int(daynum)) # daynum: 1..7 date = dt.date.fromisoformat("2020-03-10") date_today = dt.date.today() date_yesterday = dt.date.today() - dt.timedelta(days=1) # datetime dt_now = dt.datetime.now() my_dt = dt.datetime(2023, 12, 31, 14, 31, 56) # 2023-12-31 14:31:56 my_dt = dt.datetime.fromtimestamp(myTimestamp) my_dt = dt.datetime.fromisoformat("2017-01-01T12:30:59.000000") my_dt = dt.datetime.fromisoformat("2020-03-10 06:01:01+00:00") s = "2020-03-10T06:01:01Z" my_dt = dt.datetime.fromisoformat(s.replace("Z", "+00:00")) # datetime -> date my_date = my_dt.date() # date -> datetime my_date = dt.date(2023, 12, 31) my_dt = dt.datetime(my_date.year, my_date.month, my_date.day)
timezones
In Python >=3.9 use ZoneInfo instead of pytz!
from zoneinfo import ZoneInfo TZ_UTC = dt.timezone.utc # or TZ_UTC = ZoneInfo("UTC") TZ_DE = ZoneInfo("Europe/Berlin") TZ_ES = ZoneInfo("Europe/Madrid") # add local timezone of data file dt1 = dt.datetime(2023,12,31,13,45,59) # add timezone dt1 = dt1.replace(tzinfo=TZ_ES) # convert to other timezone dt2 = dt1.astimezone(TZ_UTC) # overwrite (not convert) timezone dt2 = dt1.replace(tzinfo=TZ_DE) # dt from timestamp and setting the timezine dt1 = dt.datetime.fromtimestamp(timestamp, tz=TZ_UTC) dt_now_utc = dt.datetime.now(dt.timezone.utc)# dt_utc = dt.datetime(2023, 1, 4, 12, 23, 34, tzinfo=dt.timezone.utc)
to string
datestr = dt.date.today().strftime("%y%m%d") datestr = dt.datetime.today().strftime("%Y-%m-%d %H:%M:%S") # now in UTC without milliseconds datestr = dt.datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
German format
import locale locale.setlocale(locale.LC_ALL, "de_DE") print(my_date.strftime("%a %x")) # Mo 25.12.2023
Calendar week
week = day.isocalendar()[1] print("KW%02d" % week)
rounding datetime
from datetime import datetime, timedelta def floor_dt_minutes(dt: datetime, res: int = 5) -> datetime: """ floor (=round down) minutes to X min resolution """ minNew = res * (dt.minute // res) return dt.replace(minute=minNew, second=0, microsecond=0) def ceil_dt_minutes(dt: datetime, res: int = 5) -> datetime: """ ceil (=round up) minutes to X min resolution """ minNew = res * (1 + dt.minute // res) return dt.replace(minute=0, second=0, microsecond=0) + \ timedelta(minutes=minNew) def round_dt_minutes(dt: datetime, res: int = 5) -> datetime: """ round minutes to X min resolution """ minOldDec = float(dt.minute) + float(dt.second)/60 minNew = res * round(minOldDec / res) return dt.replace(minute=0, second=0, microsecond=0) + \ timedelta(minutes=minNew) dt = datetime.fromisoformat('2020-03-10 06:01:01+00:00') print(f"original: {dt}") print(f"floored: {floor_dt_minutes(dt,5)}") print(f"ceileded: {ceil_dt_minutes(dt,5)}") print(f"rounded: {round_dt_minutes(dt,5)}")
first day of quater
def get_first_day_of_the_quarter(p_date: dt.date) -> dt.date: return dt.date(p_date.year, 3 * ((p_date.month - 1) // 3) + 1, 1)
timing
measure time elapsed
import time timestart = time.time() ... print(time.time() - timestart)
calculate time
import time duration = 1234 # sec print "ETA =",time.ctime(time.time()+duration) array = time.localtime(time.time()+duration)
Exceptions
Catch keyboard interrupt and do a "save exit"
try: FILE = open("out.txt","w") while 1: i+=1 print i except KeyboardInterrupt: FILE.close()
Catch all exceptions
try: [...] except Exception as e: print("Exception: ", e)
Custom Exceptions
try: raise Exception("HiHo")
Math: curve fitting
from[4]
import numpy as np # curve-fit() function imported from scipy from scipy.optimize import curve_fit from matplotlib import pyplot as plt # Test function with coefficients as parameters def fit_function(x, a, b): return a * np.exp(b * x) p0 = [data_y[-1], 0.14] # initial guess of parameters param, param_cov = curve_fit( fit_function, data_x, data_y, p0, bounds=((-np.inf, -np.inf), (np.inf, np.inf)) ) print(f"Coefficients:\n{param}") print(f"Covariance of coefficients:\n{param_cov}") data_y_fit = [] for x in data_x: y = fit_function(x, param[0], param[1]) data_y_fit.append(y) plt.plot(data_x, data_y, "o", color="red", label="data") plt.plot(data_x, data_y_fit, "--", color="blue", label="fit") plt.legend() plt.show()
Regular Expressions
See [5]
See [6] for an online tester
multiple flags are joined via pipe |
s = re.sub("asdf.*", r"qwertz", s, flags=re.DOTALL | re.IGNORECASE)
Lookahead and Lookbehind
pos lookahead: (?=...) neg lookahead: (?!...) pos lookbehind (?<=...) neg lookbehind (?<!...)
matching
import re # V0: simple 1 myPattern = "(/\*\*\* 0097_210000_0192539580000_2898977_0050 \*\*\*/.*?)($|/\*\*\*)" myRegExp = re.compile(myPattern, re.DOTALL) myMatch = myRegExp.search(cont) assert myMatch != None, f"golden file not found in file {filename}" cont_golden = myMatch.group(1) # V1: simple 2 assert ( re.match("^[a-z]{2}$", d_settings["country"]) != None ), f'Error: county must be 2 digit lower case. We got: {d_settings["country"]}'
Match email
def checkValidEMail(email: str) -> bool: # from https://stackoverflow.com/posts/719543/timeline bottom edit if not re.fullmatch(r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$", email): print("Error: invalid email") quit() return True
Find all
myMatches = re.findall('href="([^"]+)"', cont) for myMatch in myMatches: print(myMatch)
substring
import re # simple via search lk_id = re.search('^.*timeseries\-(\d+)\.json$', f).group(1) # simple via sub myPattern = "^.*" + s1 + "(.*)" + s2 + ".*$" out = re.sub(myPattern, r"\1", s) # more robust including an assert def substr_between(s: str, s1: str, s2: str) -> str: """ returns substring of s, found between strings s1 and s2 s1 and s2 can be regular expressions """ myPattern = s1 + '(.*)' + s2 myRegExp = re.compile(myPattern) myMatches = myRegExp.search(s) assert myMatches != None, f"E: can't find '{s1}'...'{s2}' in '{s}'" out = myMatches.group(1) return out
matchObj = re.search(r"(\d+\.\d+)", text if matchObj: price = float( '%s' % (matchObj).group(0) )
Naming of match groups
(?P<name>...), see [7]
Search and Replace
From [8]
re.sub(regex, replacement, str) performs a search-and-replace across subject, replacing all matches of regex in str with replacement. The result is returned by the sub() function. The str string you pass is not modified.
s = re.sub(" +", " ", s)
Splitting
From [9]
split() splits a string into a list delimited by the passed pattern. The method is invaluable for converting textual data into data structures that can be easily read and modified by Python as demonstrated in the following example that creates a phonebook.
First, here is the input. Normally it may come from a file, here we are using triple-quoted string syntax:
>>> input = """Ross McFluff: 834.345.1254 155 Elm Street ... ... Ronald Heathmore: 892.345.3428 436 Finley Avenue ... Frank Burger: 925.541.7625 662 South Dogwood Way ... ... ... Heather Albrecht: 548.326.4584 919 Park Place"""
The entries are separated by one or more newlines. Now we convert the string into a list with each nonempty line having its own entry:
>>> entries = re.split("\n+", input) >>> entries ['Ross McFluff: 834.345.1254 155 Elm Street', 'Ronald Heathmore: 892.345.3428 436 Finley Avenue', 'Frank Burger: 925.541.7625 662 South Dogwood Way', 'Heather Albrecht: 548.326.4584 919 Park Place']
Finally, split each entry into a list with first name, last name, telephone number, and address. We use the maxsplit parameter of split() because the address has spaces, our splitting pattern, in it:
>>> [re.split(":? ", entry, 3) for entry in entries] [['Ross', 'McFluff', '834.345.1254', '155 Elm Street'], ['Ronald', 'Heathmore', '892.345.3428', '436 Finley Avenue'], ['Frank', 'Burger', '925.541.7625', '662 South Dogwood Way'], ['Heather', 'Albrecht', '548.326.4584', '919 Park Place']]
replace cont by linebreaks
def replace_cont_by_linebreaks(s: str, regex: str) -> str: """ Replace regex in s by the number of linebreaks it originally contained. """ myMatches = re.findall(regex, s, flags=re.DOTALL) for match in myMatches: linebreaks = match.count("\n") s = s.replace(match, "\n" * linebreaks, 1) return s
perl grep and map
from [10]
def grep(list, pattern): expr = re.compile(pattern) return [elem for elem in list if expr.match(elem)] or filteredList = filter(lambda x: x < 7 and x > 2, unfilteredList) def map(list, was, womit): return list(map(lambda i: re.sub(was, womit, i), list)) # was = '.*"(\d+)".*' # womit = r"\1"
unit testing using pytest
install via
pip install pytest
activate in vscode, see [11]: To enable testing, use the Python: Configure Tests command on the Command Palette.
see https://docs.pytest.org/en/6.2.x/assert.html#assert
test_1.py:
import myLib # my custom lib to test class TestClass: def test_one(self): x = "this" assert "h" in x def test_two(self): assert myLib.multiply(1, 2) == 2 def test_three(self): assert myLib.multiply(2, 2) == 2
Sleep / Wait for input
sleep for a while
import time time.sleep(60)
wait for user input
input("press Enter to close")
Suppress Warnings
see [12]
import warnings warnings.filterwarnings("ignore", message=".*native_field_num.*not found in message.*")
File Access
Pathlib
for migrating see table [13]
split to dir, filename, ext
from pathlib import Path my_file_str = "/tmp/dir/test.txt" p = Path("/tmp/dir/test.txt") my_file_name_with_ext = Path(my_file_str).name # alternative to basename my_file_name_without_ext = Path(my_file_str).stem my_dir = Path(my_file_str).parent my_parent_dir = Path(my_file_str).parents[1] my_ext = Path(my_file_str).suffix my_file2 = Path(p).with_suffix(".json") my_file3 = p.parent / (p.stem + "-autofix.tex")
alternative using old os functions Split path into folder, filename, ext
import os (dirName, fileName) = os.path.split(f) (fileBaseName, fileExtension) = os.path.splitext(fileName) fileOut = os.path.splitext(fileIn)[0] + "-out.txt"
check if exists
Path(my_file_str).is_dir() Path(my_file_str).is_file()
cont = p.read_text() p.write_text(cont)
loop over glob of files matching wildcard
for fileOut in Path("mydir").glob("*-autofix.tex"):
Checking Operating System
import os import sys if os.name == "posix": print("posix/Unix/Linux") elif os.name == "nt": print("windows") else: print("unknown os") sys.exit(1) # throws exception, use quit() to close / die silently
Get filename of python script
my_file_path = __file__
alternative using sys package
from sys import argv myFilename = argv[0]
accessing os envrionment variables
import os print(os.getenv("tmp"))
Command Line Arguments
ArgumentParser
import argparse parser = argparse.ArgumentParser() # construct the argument parser and parse the arguments # -h comes automatically # Boolean Parameter parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true") # store_true -> Boolean Value # Choice Parameter # restrict to a list of possible values / choices # parser.add_argument("--choice", type=int, choices=[0, 1, 2], help="Test choices") # Positional Parameter (like text.py 123) # parser.add_argument("num", type=int, help="Number of things") # Required Parameter # parser.add_argument("-i", "--input", type=str, required=True, help="Path of input file") # Optional Parameter parser.add_argument("-n", "--number", type=int, help="Number of clicks") # Optional Parameter with Default parser.add_argument("-s", "--seconds", type=int, default=secDefault, help="Duration of clicking, default = %i (sec)" % secDefault) args = vars(parser.parse_args()) if args["verbose"]: pass # do nothing # print ("verbosity turned on") if args["number"]: print("num=%i" % args["number"])
match case statement
(new in python 3.10)
match args: case {"sap": "prod", "version": 1}: ... case {"sap": "prod", "version": 2}: ... case _: # default case
File Modifications
Copy File
shutil.copyfile(fileTemp, os.path.join(dest_path, fileOut))
Move/Rename file
via pathlib
Path("file.txt").replace(Path("file2.txt")
via os
os.rename(fileIn, fileDone)
Delete file
via pathlib
Path("file.txt").unlink()
via os
os.remove("file.txt")
Touch File
if os.path.exists(fname): os.utime(fname, None) else: open(fname, "w").close()
File Meta Data
Get file size
import os int (os.path.getsize("moinsen.txt") )
Read Timestamp (last modified)
lasttime = os.path.getmtime(fname)
Directories / Folders
Cross platform paths
currentdir = os.curdir mysubdir = os.path.join(currentdir, "mysubdir")
Create Dir
# via pathlib from pathlib import Path Path("myDir1/myDir2").mkdir(parents=True, exist_ok=True) # via os os.makedirs(dir, exist_ok=True) # recursively: with all parents # or manually if not os.path.isdir(dir) : os.mkdir(dir)
Delete folder+contents
import shutil shutil.rmtree(d)
Fetch Dir Contents / Loop over Files
Simple
Get list of files in directory, filter dirs from list, filter by ext
dir= "/path/to/some/dir" listoffiles = [ f for f in os.listdir(dir) if os.path.isfile(os.path.join(dir ,f)) and f.lower()[-4:] == ".gpx"] listoffiles.sort()
glob via pathlib
for fileOut in Path("mydir").glob("*-autofix.tex"): print(f)
alternative loop via glob
import glob, os os.chdir("/mydir") for f in glob.glob("*.txt"): print(f)
or simply os.listdir:
import os for f in os.listdir("/mydir"): if f.endswith(".txt"): print(os.path.join("/mydir", f))
new in 3.5: scandir
for f in os.scandir('./'): if f.is_file(): (filename, fileext) = os.path.splitext(f.name)
Traverse in Subdirs
# walk into path an fetch all files matching extension jpe?g files = [] for (dirpath, dirnames, filenames) in os.walk("."): dirpath = dirpath.replace("\\", "/") for file in filenames: if file.endswith(".txt"): files.append(dirpath + "/" + file) elif re.search(r"\.jpe?g$", file, re.IGNORECASE): files.append(dirpath + "/" + file)
File Parsing
File General
File Read
Check if file / dir exists
import os.path # os.path - The key to File I/O os.path.exists("text.txt") os.path.isfile("text.txt") os.path.isdir("text") os.path.isabs("/home/torben/text.txt") # Is it an absolute path
- using pathlib
cont = file_in.read_text(encoding="utf-8")
with open(file=file_in, encoding="utf-8") as fh: cont = fh.read() # or list = fh.readlines() # or line = fh.readline() # or for line in fh: print(line) fh = open(file=file_in, encoding="utf-8") ... fh.close()
File Write
file_out = "out/1/out.txt" (file_path, file_name) = os.path.split(file_out) # (file_base_name, file_extension) = os.path.splitext(file_name) os.makedirs(filepath, exist_ok=True) # = mkdir -p with open(file=file_out, mode="w", encoding="utf-8", newline="\n") as fh: # w = overWrite file ; a = append to file # If running Python in Windows, "\n" is automatically replaced by "\r\n". To prevent this use newline='\n' fh.writelines(list) # no linebreaks # or fh.write("\n".join(list)) # or for line in list: fh.write(line) # Force update of filecontents without closing it fh.flush() # alternative fh = open(file=file_out, mode="w", encoding="utf-8", newline="\n") ... fh.close()
INI Config File Reading
Config.ini
[Section1] Cursor = 205E18 Grandma = 18E18 Farm = 11E18 Mine = 514E18 Factory = 155E18
test.py
from configparser import ConfigParser config = ConfigParser( interpolation=None ) # interpolation=None -> treats % in values as char % instead of interpreting it config.read("Config.ini", encoding="utf-8") print(config.getint("Section1", "key1")) print(config.getfloat("Section1", "key2")) print(config.get("Section1", "key3")) for sec in config.sections(): d_settings = {} for key in config.options(sec): value = config.get(sec, key) d_settings[key] = value print("%15s : %s" % (key, value))
CSV
CSV Read
import csv # note: utf-8-sig for UTF-8-BOM with open("data/ref_selected_countries.csv", mode="r", encoding="utf-8") as fh: csv_reader = csv.DictReader(fh, dialect="excel", delimiter="\t") for row in csv_reader: print(f'\t{row["name"]} works in the {row["department"]} department')
CSV Write
plain writing
with open('data.tsv', mode='w', encoding='utf-8', newline='\n') as fh: csvwriter = csv.writer(fh, delimiter="\t") csvwriter.writerow( ('Date', 'Confirmed') )
with open(filename + ".tsv", mode="w", encoding="utf-8", newline="\n") as fh: csvwriter = csv.DictWriter( fh, delimiter="\t", extrasaction="ignore", fieldnames=["date", "occupied_percent", "occupied", "total"], ) csvwriter.writeheader() for d in myList: d["occupied_percent"] = round(100 * d["occupied"] / d["total"], 1) csvwriter.writerow(d)
JSON
JSON Read
with open(download_file, encoding="utf-8") as fh: d_json = json.load(fh)
JSON Write
Write dict to file in JSON format, keeping utf-8 encoding
with open("my_file.json", mode="w", encoding="utf-8", newline="\n") as fh: json.dump(my_dict, fh, ensure_ascii=False, sort_keys=False, indent=2)
Excel
Excel Read
import openpyxl workbook = openpyxl.load_workbook( pathToMyExcelFile, data_only=True, # read values instead of formulas read_only=True, # suppresses: UserWarning: wmf image format is not supported so the image is being dropped ) : sheet = workbook["mySheetName"] # or fetch active sheet sheet = workbook.active cell = sheet["A34"] # or cell = sheet.cell(row=34, column=1) # index start here with 1 print(cell.value) # or print(sheet.cell(column=col, row=row).value)
Excel Write
import openpyxl workbookOut = openpyxl.Workbook() sheetOut = workbookOut.active cellIn = sheetOut["A34"] # or cellOut = sheetOut.cell(row=i, column=j) # index start here with 1 cellOut.value = "asdf"a workbookOut.save("out.xlsx")
Image/Picture/Photo Resize and Exif Modifying
from PIL import Image, ImageFilter # pip install Pillow fileIn = "2018-02-09 13.56.25.jpg" # Read image img = Image.open(fileIn)
PROBLEM:
PIL Image.save() drops the IPTC data like tags, keywords, copywrite, ...
better using https://imagemagick.org instead when tags shall be kept
Resize
# Resize keeping aspect ration -> img.thumbnail # drops exif data, exif can be added from source file via exif= in save, see below size = 1920, 1920 img.thumbnail(size, Image.ANTIALIAS)
Export file
fileOut = os.path.splitext(fileIn)[0] + "-edit.jpg" try: img = Image.open(fileIn) img.save(fp=fileOut, format="JPEG", quality='keep') # exif=dict_exif_bytes # JPEG Parameters # * qualitiy : 'keep' or 1 (worst) to 95 (best), default = 75. Values above 95 should be avoided. # * dpi : tuple of integers representing the pixel density, (x,y) except IOError: print("cannot write file '%s'" % fileOut)
Export Progressive / web optimized JPEG
from PIL import ImageFile # for MAXBLOCK for progressive export fileOut = os.path.splitext(fileIn)[0] + "-progressive.jpg" try: img.save(fp=fileOut, format="JPEG", quality=80, optimize=True, progressive=True) except IOError: ImageFile.MAXBLOCK = img.size[0] * img.size[1] img.save(fp=fileOut, format="JPEG", quality=80, optimize=True, progressive=True)
JPEG Meta Data: EXIF and IPTC
IPTC: Tags/Keywords
from iptcinfo3 import IPTCInfo # this works in pyhton 3! iptc = IPTCInfo(fileIn) if len(iptc['keywords']) > 0: # or supplementalCategories or contacts print('====> Keywords') for key in sorted(iptc['keywords']): s = key.decode('ascii') # decode binary strings print(s)
EXIF via piexif
import piexif # pip install piexif exif_dict = piexif.load(img.info['exif']) print(exif_dict['GPS'][piexif.GPSIFD.GPSAltitude]) # returns list of 2 integers: value and donator -> v / d # (340000, 1000) => 340m # (51, 2) => 25.5m # Modify altitude exif_dict['GPS'][piexif.GPSIFD.GPSAltitude] = (140, 1) # 140m # write to file exif_bytes = piexif.dump(exif_dict) fileOut = os.path.splitext(fileIn)[0] + "-modExif.jpg" try: img.save(fp=fileOut, format="jpeg", exif=exif_bytes, quality='keep') except IOError: print("cannot write file '%s'" % fileOut)
or
exif_dict = piexif.load(fileIn) for ifd in ("0th", "Exif", "GPS", "1st"): print("===" + ifd) for tag in exif_dict[ifd]: print(piexif.TAGS[ifd][tag]["name"], "\t", tag, "\t", exif_dict[ifd][tag]) print(exif_dict['0th'][306]) # 306 = DateTime
EXIF via exifread
# Open image file for reading (binary mode) fh = open(fileIn, "rb") # Return Exif tags exif = exifread.process_file(fh) fh.close() # for tag in exif.keys(): # if tag not in ('JPEGThumbnail', 'TIFFThumbnail', 'Filename', 'EXIF MakerNote'): # print("%s\t%s" % (tag, exif[tag])) print(exif["Image DateTime"]) print(exif["GPS GPSLatitude"]) print(exif["GPS GPSLongitude"])
EXIF GPS via PIL
# from https://developer.here.com/blog/getting-started-with-geocoding-exif-image-metadata-in-python3 def get_exif(filename): image = Image.open(filename) image.verify() image.close() return image._getexif() def get_labeled_exif(exif): labeled = {} for (key, val) in exif.items(): labeled[TAGS.get(key)] = val return labeled def get_geotagging(exif): if not exif: raise ValueError("No EXIF metadata found") geotagging = {} for (idx, tag) in TAGS.items(): if tag == "GPSInfo": if idx not in exif: raise ValueError("No EXIF geotagging found") for (key, val) in GPSTAGS.items(): if key in exif[idx]: geotagging[val] = exif[idx][key] return geotagging def get_decimal_from_dms(dms, ref): degrees = dms[0][0] / dms[0][1] minutes = dms[1][0] / dms[1][1] / 60.0 seconds = dms[2][0] / dms[2][1] / 3600.0 if ref in ["S", "W"]: degrees = -degrees minutes = -minutes seconds = -seconds return round(degrees + minutes + seconds, 5) def get_coordinates(geotags): lat = get_decimal_from_dms(geotags["GPSLatitude"], geotags["GPSLatitudeRef"]) lon = get_decimal_from_dms(geotags["GPSLongitude"], geotags["GPSLongitudeRef"]) return (lat, lon) exif = get_exif(fileIn) exif_labeled = get_labeled_exif(exif) print(exif_labeled["DateTime"]) geotags = get_geotagging(exif) print(get_coordinates(geotags))
Template Matching
see Python - CV2
Optical Character Recognition (OCR)
see Python - OCR
GPX parsing
import gpxpy import gpxpy.gpx # Elevation data by NASA: see lib at https://github.com/tkrajina/srtm.py fh_gpx_file = open(gpx_file_path, 'r') gpx = gpxpy.parse(fh_gpx_file) # Loops for accessing the data for track in gpx.tracks: for segment in track.segments: for point in segment.points: for waypoint in gpx.waypoints: for route in gpx.routes: for point in route.points: # interesting properties of point / waypoint objects: point.time point.latitude point.longitude point.source waypoint.name
diff of 2 files
import difflib file1 = open(fileIn, "r", encoding="utf-8") file2 = open(fileOut, "r", encoding="utf-8") diff = difflib.ndiff(file1.readlines(), file2.readlines()) delta = "".join(l for l in diff if l.startswith("+ ") or l.startswith("- ")) print(delta)
Templates/Snippets
Logging
V2: File and STDOUT
# 1. setup import logging from logging.handlers import RotatingFileHandler logfile = "myApp.log" maxBytes = 20 * 1024 * 1024 backupCount = 5 loglevel_console = logging.INFO loglevel_file = logging.DEBUG # create logger logger = logging.getLogger("root") logger.setLevel(loglevel_file) # console handler ch = logging.StreamHandler() ch.setLevel(loglevel_console) # rotating file handler # fh = logging.FileHandler(logfile) fh = RotatingFileHandler(logfile, maxBytes=maxBytes, backupCount=backupCount) fh.setLevel(loglevel_file) # create formatter and add it to the handlers # %(name)s = LoggerName, %(threadName)s = TreadName formatter = logging.Formatter( "%(asctime)s - %(levelname)s - %(name)s - %(threadName)s - %(message)s " ) fh.setFormatter(formatter) ch.setFormatter(formatter) # add the handlers to the logger logger.addHandler(ch) logger.addHandler(fh) logger.debug("DebugMe") logger.info("Starting") logger.warning("Attention") logger.error("Something went wrong") logger.critical("Something seriously went wrong ") # 2. in other files/modules now use import logging logger = logging.getLogger(__name__) logger.info("text") ... except Exception as e: logger.exception("Unhandeled exception") quit()
V1: STDOUT
import logging # Logging is nicer than print, as it can automatically add the threadname logging.basicConfig( level=logging.DEBUG, format="%(asctime)s %(levelname)s %(threadName)s: %(message)s", ) logging.info("Starting")
Hexadecimal
see Python - Hex
Compile to .exe
pip install pyinstaller pyinstaller --onefile --console your.py # for Excel and Matplotlib these options are required --hidden-import=openpyxl --hidden-import=matplotlib --hidden-import pandas.plotting._matplotlib
(Python - py2exe is deprecated)
Process Bar
see tqdm
from tqdm import tqdm for i in tqdm(range(10000)): ....
CGI Web development
# Print necessary headers. print("Content-Type: text/html") print() # errors and debugging info to browser import cgitb cgitb.enable()
Access URL or Form Parameters
# V2 from https://www.tutorialspoint.com/python/python_cgi_programming.htm import cgi form = cgi.FieldStorage() username = form.getvalue('username') print(username)
# V1 import sys import urllib.parse query = os.environ.get('QUERY_STRING') query = urllib.parse.unquote(query, errors="surrogateescape") d = dict(qc.split("=") for qc in query.split("&")) print(d)
CGI Backend Returning JSONs
#!/usr/local/bin/python3.6 # -*- coding: utf-8 -*- import cgi import json # Print necessary headers. print("Content-type: application/json") print() def get_form_parameter(para: str) -> str: "asserts that a given parameter is set and returns its value" value = form.getvalue(para) assert value, f"Error: parameter {para} missing" assert value != "", f"Error: parameter {para} missing" return value response = {} response['status'] = "ok" try: action = get_form_parameter("action") response['action'] = action if action == "myAction": ... except Exception as e: response['status'] = "error" d = {"type": str(type(e)), "text": str(e)} response["exception"] = d finally: print(json.dumps(response))
Databases
PostgreSQL
Basics
import psycopg2 import psycopg2.extras credentials = { "host": "localhost", "port": 5432, "database": "myDB", "user": "myUser", "password": "myPwd", } connection = psycopg2.connect(**credentials) cursor = connection.cursor(cursor_factory=psycopg2.extras.DictCursor) l_bind_vars = [["A1", "A2"], ["B1", "B2"]] sql = """ SELECT * FROM myTable WHERE 1=1 AND status NOT IN ('CLOSED') AND ColA = %s AND ColB = %s ORDER BY created DESC """ cursor.execute(sql, l_bind_vars) d_data = dict(cursor.fetchone())
export result to csv file
sql1 = "SELECT * FROM table" sql2 = "COPY (" + sql1 + ") TO STDOUT WITH CSV HEADER DELIMITER '\t'" with open("out.csv", "w") as file: cursor.copy_expert(sql2, file)
ReadConfigFile to connect to PostgreSQL
from [14] database.ini
[postgresql] host=dbhost port=5432 database=dbname user=dbuser password=dbpass
from configparser import ConfigParser def config(filename="database.ini", section="postgresql"): parser = ConfigParser() parser.read(filename) # get section, default to postgresql db = {} if parser.has_section(section): params = parser.items(section) for param in params: db[param[0]] = param[1] else: raise Exception( "Section {0} not found in the {1} file".format(section, filename) ) return db
main.py
import psycopg2 from config import config def connect(): """Connect to the PostgreSQL database server""" conn = None try: params = config() # read connection parameters print("Connecting to the PostgreSQL database...") conn = psycopg2.connect(**params) cur = conn.cursor() # create a cursor print("PostgreSQL database version:") cur.execute("SELECT version()") # execute a statement db_version = cur.fetchone() print(db_version) cur.close() except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close() print("Database connection closed.") if __name__ == "__main__": connect()
SQL Lite / SQLite
see page SQLite
Internet Access
Send E-Mails
see Python - eMail
Download file
in Python 3 this is the preferred way of downloading files:
import urllib.request url = "https://pomber.github.io/covid19/timeseries.json" filedata = urllib.request.urlopen(url) datatowrite = filedata.read() with open("test.json", "wb") as fh: fh.write(datatowrite)
Download data using browser UA
import requests headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0", } resp = requests.get(url, headers=headers, timeout=3, # timeout in sec, requests should always have a timeout! # timeout=(1,3), # 1s connect-timout, 3s read-timeout ) if resp.status_code != 200: raise Exception( f"E: bad response. status code:{resp.status_code}, text:\n{resp.text}" )
Download only if cache is too old
def fetch_url_or_cache(fileCache, url) -> str: if check_cache_file_available_and_recent( file_cache=fileCache, max_age=3600, verbose=False ): with open(fileCache, mode="r", encoding="utf-8") as fh: s = fh.read() else: s = fetch(url=url) with open(fileCache, mode="w", encoding="utf-8", newline="\n") as fh: fh.writelines(s) return s def check_cache_file_available_and_recent( file_path: Path | str, max_age: int = 3500, ) -> bool: if isinstance(file_path, str): file_path = Path(file_path) cache_good = False if file_path.exists() and (time.time() - os.path.getmtime(file_path) < max_age): # type: ignore # noqa: E501 cache_good = True return cache_good # or def check_cache_file_available_and_recent( file_cache: str, max_age: int = 3600, verbose: bool = False ) -> bool: b_cache_good = True if not os.path.exists(file_cache): if verbose: print(f"No Cache available: {file_cache}") b_cache_good = False if b_cache_good and time.time() - os.path.getmtime(file_cache) > max_age: if verbose: print(f"Cache too old: {file_cache}") b_cache_good = False return b_cache_good def fetch(url) -> str: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0", } resp = requests.get(url, headers=headers) if resp.status_code == 200: return resp.content.decode("ascii") else: raise Exception( f"E: bad response. status code:{resp.status_code}, text:\n{resp.text}" )
Download HTML and extract elements
V2: via BeautifulSoup
from bs4 import BeautifulSoup # pip install beautifulsoup4 soup = BeautifulSoup(cont, features='html.parser') myElement = soup.find("div", {"class": "user-formatted-inner"}) myBody = myElement.prettify() # myBody = myElement.encode() # myBody = str(myElement)
V1: via lxml and xpath
from lxml import html import requests page = requests.get(url) tree = html.fromstring(page.content) tbody_trs = tree.xpath("//*/tbody/tr") l_rows = [] for tr in tbody_trs: l_columns = [] if len(tr) != 15: continue for td in tr: l_columns.append(td.text_content()) l_rows.append(list(l_columns))
HTML entities to unicode
import html cont = html.unescape(cont)
Call Rest API
def perform_rest_call_str(url: str) -> str: resp = requests.get(url, timeout=3) if resp.status_code != 200: raise Exception( f"E: bad response. status code:{resp.status_code}, text:\n{resp.text}" ) return resp.text
GUI Interactions
Take Screenshot
import pyautogui # (c:\Python\Scripts\)pip install pyautogui # pyautogui does only support screenshots on monitor #1 ... screenshot = pyautogui.screenshot() # screenshot = pyautogui.screenshot(region=(screenshotX,screenshotY, screenshotW, screenshotH)) screenshot = np.array(screenshot) # Convert RGB to BGR screenshot = screenshot[:, :, ::-1].copy()
Mouse Actions
def clickIt(x,y,key="") : x0, y0 = pyautogui.position() if key != "": # crtl, shift pyautogui.keyDown(key) pyautogui.moveTo(x, y, duration=0.2) pyautogui.click(x=x , y=y, button='left', clicks=1, interval=0.1) if key != "": # crtl, shift pyautogui.keyUp(key) pyautogui.moveTo(x0, y0)
Web Automation
from selenium import webdriver from selenium.webdriver.common.keys import Keys # from selenium.webdriver import Firefox from selenium.webdriver.firefox.options import Options import os import time import glob class StravaUserMapDL(): def __init__(self): self.driver = webdriver.Firefox() def login(self): driver = self.driver url = "https://www.somewebpage.com" email = "myemail" password = "mypassword" driver.get(url) title = driver.title urlIs = driver.current_url cont = driver.page_source # as string FILE = open(filename,"w") # w = overWrite file ; a = append to file FILE.write(cont) FILE.close() # handle login if urlIs != url if (urlIs != url): # activate checkbox 'remember_me' elem = driver.find_element_by_id('remember_me') if (elem.is_selected() == False): elem.click() assert elem.is_selected() == True elem = driver.find_element_by_id('email') elem.send_keys(email) elem = driver.find_element_by_id('password') elem.send_keys(password) elem.send_keys(Keys.RETURN) # Wait until login pages is replaced by real page urlIs = driver.current_url while (urlIs != url): time.sleep(1) urlIs = driver.current_url print (urlIs) # results = driver.find_elements_by_class_name('following') # results = driver.find_elements_by_tag_name('li') # print(results[0].text) assert (urlIs == url)
Unit Tests using Web Automation
import unittest from selenium import webdriver from selenium.webdriver.common.keys import Keys #from selenium.webdriver import Firefox from selenium.webdriver.firefox.options import Options import os import time class PythonOrgSearch(unittest.TestCase): # def __init__(self,asdf): # self.driver = webdriver.Firefox() def setUp(self): print ("setUp") # headless mode: # opts = Options() # opts.set_headless() # assert opts.headless # Operating in headless mode # self.driver = webdriver.Firefox(options=opts) self.driver = webdriver.Firefox() def test_search_in_python_org(self): driver = self.driver driver.get("http://www.python.org") self.assertIn("Python", driver.title) elem = driver.find_element_by_name("q") elem.send_keys("pycon") elem.send_keys(Keys.RETURN) assert "No results found." not in driver.page_source print ("fertig: python_org") def tearDown(self): print ("tearDown") print ("close Firefox") self.driver.close() # close tab self.driver.quit() # quit browser # os._exit(1) # exit unittest without Exception if __name__ == "__main__": try: unittest.main() except SystemExit as e: os._exit(1)
Cryptography and Hashing
Hashing via SHA256
def gen_SHA256_string(s: str) -> str: m = hashlib.sha256() m.update(s.encode("ascii")) return m.hexdigest()
Hashing via MD5
(MD5 is not secure, better use SHA256)
def gen_MD5_string(s: str) -> str: m = hashlib.md5() m.update(s.encode("ascii")) return m.hexdigest()
Password hashing via bcrypt
import bcrypt pwd = 'geheim' pwd = pwd.encode("utf-8") # or pwd = b'geheim' hashed = bcrypt.hashpw(pwd, bcrypt.gensalt()) if bcrypt.checkpw(pwd, hashed): print("It Matches!") print(hashed.decode("utf-8"))
To use version 2a instead of 2b (default):
bcrypt.gensalt(prefix=b"2a")
Multiprocessing, subprocesses and Threading
see Python_-_Multithreading as well
use processes for CPU limited work
use threads for I/O limited work
Simple single process
import subprocess process = subprocess.run(["sudo", "du", "--max-depth=1", mydir], capture_output=True, text=True) print (process.stdout)
old, depricated way:
os.system( "gnuplot " + gpfile)
Multiprocessing
see Python - Multithreading as well
V2 using pool and starmap
import multiprocessing import os def worker(i: int, s: str) -> list: result = (i, s, os.getpid()) return result if __name__ == "__main__": # gen. pile of work l_pile_of_work = [] for i in range(1_000): tup = (i, "n" + str(i)) l_pile_of_work.append((tup)) # gen pool of processes num_processes = min(multiprocessing.cpu_count(), len(l_pile_of_work)) pool = multiprocessing.Pool(processes=num_processes) # start processes on pile of work l_results_unsorted = pool.starmap( func=worker, iterable=l_pile_of_work # each item is a list of 2 parameters ) # or if only one parameter: # l_results_unsorted = pool.map(doit_de_district, l_pile_of_work) l_results = sorted(l_results_unsorted) # sort by i
V1
import subprocess l_subprocesses = [] # queue list of subprocesses max_processes = 4 def process_enqueue(new_process_parameters): global l_subprocesses # wait for free slot while len(l_subprocesses) >= max_processes: process_remove_finished_from_queue() time.sleep(0.1) # sleep 0.1s process = subprocess.Popen(new_process_parameters, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) l_subprocesses.append(process) def process_remove_finished_from_queue(): global l_subprocesses i = 0 while i <= len(l_subprocesses) - 1: process = l_subprocesses[i] if process.poll != None: # has already finished process_print_output(process) l_subprocesses.pop(i) else: # still running i += 1 def process_print_output(process): """waits for process to finish and prints process output""" stdout, stderr = process.communicate() if stdout != : print(f'Out: {stdout}') if stderr != : print(f'ERROR: {stderr}') def process_wait_for_all_finished(): global l_subprocesses for process in l_subprocesses: process_print_output(process) l_subprocesses = [] # empty list of done subprocesses process_enqueue(l_parameters1) ... process_enqueue(l_parameters999) process_wait_for_all_finished()
Threading
import threading import queue import os import time def worker(q_work: queue.Queue, results: dict): while not q_work.empty(): i, s = q_work.get() time.sleep(.1) result = (i, s, os.getpid()) results[i] = result q_work.task_done() if __name__ == '__main__': d_results = {} # threads can write into dict # gen. pile of work l_pile_of_work = [] for i in range(1_000): tup = (i, "n"+str(i)) l_pile_of_work.append((tup)) # convert list of work to queue q_pile_of_work = queue.Queue( maxsize=len(l_pile_of_work)) # maxsize=0 -> unlimited for params in l_pile_of_work: q_pile_of_work.put(params) # gen threads num_threads = 100 l_threads = [] # List of threads, not used here for i in range(num_threads): t = threading.Thread(name='myThread-'+str(i), target=worker, args=(q_pile_of_work, d_results), daemon=True) l_threads.append(t) t.start() q_pile_of_work.join() # wait for all threas to complete l_results_unsorted = d_results.values() l_results = sorted(l_results_unsorted) # sort by i
asyncio — Asynchronous I/O
see https://docs.python.org/3/library/asyncio-task.html
import asyncio import time # basics # task = asyncio.create_task(coro()) # Wrap the coro coroutine into a Task and schedule its execution. Return the Task object. # sleep # await asyncio.sleep(1) # Running Tasks Concurrently and gathers the return values in list L # L = await asyncio.gather(coro(x1,y1), coro(x2,y2), coro(x3,y3)) async def say_after(delay, what): # Coroutines declared with the async/await syntax await asyncio.sleep(delay) print(what) async def main(): # The asyncio.create_task() function to run coroutines concurrently as asyncio Tasks. task1 = asyncio.create_task( say_after(1, 'hello')) task2 = asyncio.create_task( say_after(1, 'world')) print(f"started at {time.strftime('%X')}") # Wait until both tasks are completed await task1 await task2 print(f"finished at {time.strftime('%X')}") asyncio.run(main())
Pandas
see Pandas
Matplotlib
see Matplotlib
GUI via tkinter
import tkinter as tk # no need to install via pip class App(tk.Tk): def __init__(self): super().__init__() self.title("robot-CClicker") self.geometry("200x200+0+1080") self.resizable(width=False, height=False) self.l_buttons = [] self.__create_widgets() def __create_widgets(self): self.btn_click500 = tk.Button( master=self, text="500 clicks", width=20, command=lambda: self.clickBigCookie(500), ) self.l_buttons.append(self.btn_click500) for button in self.l_buttons: button.pack(anchor=tk.W) def clickBigCookie(self, num): # TODO: the disabling of the buttons is not working for button in self.l_buttons: button["state"] = tk.DISABLED helper.clickIt(self.posBigCockie[0], self.posBigCockie[1], num=num) for button in self.l_buttons: button["state"] = tk.NORMAL if __name__ == "__main__": app = App() app.mainloop()
Protobuf
convert .proto file to python class
see https://www.datascienceblog.net/post/programming/essential-protobuf-guide-python/
protoc my_message.proto --python_out ./
read/decode protobuf message
parse message from file
import machine_message_pb2 with open("out.bin", "rb") as f: my_message = my_message_pb2.my_message() my_message.ParseFromString(f.read()) print(machine_message)
create/encode protobuf message
import machine_message_pb2 my_message = my_message_pb2.my_message() my_message.data.field1 = 1 my_message.data.field2 = "asdf" with open("out.bin", "wb") as f: f.write(my_message.data.SerializeToString())
SMTP eMail Sending
#!/usr/bin/env python3 # from https://stackoverflow.com/a/64890 from email.mime.text import MIMEText from email.utils import formatdate, make_msgid # this invokes the secure SMTP protocol (port 465, uses SSL) from smtplib import SMTP_SSL as SMTP def smtp_connect(smtp_server, smtp_username, smtp_password): conn = SMTP(smtp_server) conn.set_debuglevel(False) conn.login(smtp_username, smtp_password) if VERBOSE: print("SMTP connected") return conn def smtp_send(smtp_conn, mail_to, mail_subject, mail_body): # typical values for text_subtype are plain, html, xml mail_text_subtype = "plain" msg = MIMEText(mail_body, mail_text_subtype) msg["Date"] = formatdate() # important to add, since not all smtp servers add missing fields msg['Message-ID'] = make_msgid(domain="entorb.net") # same msg["From"] = mail_from msg["To"] = mail_to msg["Subject"] = mail_subject print(msg.as_string()) smtp_conn.sendmail(mail_from, mail_to, msg.as_string()) smtp_conn.quit()
venv/ virtual environments
create
python -m venv --prompt myProject .venv
activate
# Linux source .venv/Scripts/activate # Windows .venv/Scripts/activate.bat
Pydantic data validation
Function input parameter validation
see https://docs.pydantic.dev/usage/validation_decorator/
@validate_arguments def sum(x: int, y: float) -> float: print(x, y) return x + y print(sum(1.1, 1.1)) # -> 2.1
TOML
see https://learnxinyminutes.com/docs/toml/
minimal example: config.toml
# SAP API endpoint [general] sleep_time = 60 use_proxy = true
tool.py
try: import tomllib # comes with python3.11 except ModuleNotFoundError: import tomli as tomllib # pip install tomli with open("config.toml", "rb") as f: o = tomllib.load(f) # type: ignore if o["settings"]["use_proxy"]: ...
for validation, see https://realpython.com/python-toml/
VCard
import codecs import vobject # pip install vobject obj = vobject.readComponents(codecs.open(file_in, encoding="utf-8").read()) # type: ignore contacts: list[vobject.base.Component] = list(obj) # type: ignore card = contacts[0] if "bday" not in card.contents: continue # bday: remove 'VALUE': ['DATE'] card.contents["bday"][0].params = {} # type: ignore # remove all fields but "bday", "n" for key in card.contents.copy(): # loop over copy, to allow for deleting keys if key not in ("n", "bday"): del card.contents[key] # recreate fn based on n card.add("fn") n = card.contents["n"][0] fn = f"{n.value.given} {n.value.additional} {n.value.family}" # type: ignore fn = re.sub(r"\s+", " ", fn) card.fn.value = fn # type: ignore with open("out.vcf", mode="w", encoding="utf-8", newline="\n") as fhOut: fhOut.write(card.serialize())