Python

From Torben's Wiki


Contents

Template

Standard Template

see header documentation example 1 and example 2 see Google Python Styleguide

#!/usr/bin/python
# -*- coding: utf-8 -*-

"""
Here comes the docstring containing the description of this piece of software
""" 

# Built-in/Generic Imports
import os
import os.path  # os.path - The key to File I/O
import argparse
import configparser
import logging

# Libs
# import openpyxl

# Author and version info
__author__ = "Dr. Torben Menke"
__email__ = "https://entorb.net"
__maintainer__ = __author__
# __copyright__ = "Copyright 2020, My Project"
# __credits__ = ["John", "Jim", "Jack"]
__license__ = "GPL"
__status__ = "Dev"
__version__ = "0.1"


# Init Logging
# from [1]
# create logger
logger = logging.getLogger('myLogger')
logger.setLevel(logging.DEBUG)
# create file handler which logs even debug messages
logger_fh = logging.FileHandler('log.log')
logger_fh.setLevel(logging.DEBUG)
# create console handler with a higher log level
logger_ch = logging.StreamHandler()
logger_ch.setLevel(logging.INFO)
# create formatter and add it to the handlers
# %(name)s = LoggerName, %(threadName)s = TreadName
logger_formatter = logging.Formatter(
    '%(asctime)s - %(levelname)s - %(message)s')
logger_fh.setFormatter(logger_formatter)
logger_ch.setFormatter(logger_formatter)
# add the handlers to the logger
logger.addHandler(logger_fh)
logger.addHandler(logger_ch)
#
logger.debug('DebugMe')
logger.info('Starting')
logger.warning('Attention')
logger.error('Something went wrong')
logger.critical('Something seriously went wrong')


# Read Commandline Parameters
# construct the argument parser and parse the arguments
arg_parser = argparse.ArgumentParser()
# -h comes automatically
# Boolean Parameter
arg_parser.add_argument("-v", "--verbose", help="increase output verbosity",
                        action="store_true")  # store_true -> Boolean Value
#
args = vars(arg_parser.parse_args())
if args["verbose"]:
    logger_ch.setLevel(logging.DEBUG)


# Read config file
config = configparser.ConfigParser()
config.read('config.ini', encoding='utf-8')
# print(config.getfloat('Section1', 'Value1'))


# Dummy variables
s = "asdf"
l = ["a", "b", "c"]
d = {'keyx': 'valuex', 'keyy': 'valuey'}
d['keyz'] = 'valuez'


# File access
fileOut = "out/1/out.txt"
(filepath, fileName) = os.path.split(fileOut)
(fileBaseName, fileExtension) = os.path.splitext(fileName)
os.makedirs(filepath, exist_ok=True)

FILE = open(fileOut, "w")  # w = overWrite file ; a = append to file
FILE.write(s + "\n")
FILE.close()

fileIn = fileOut
FILE = open(fileIn, "r")
cont_list = (FILE.read()).split("\n")
FILE.close()

Object Oriented Template

class myDevice():
    def __init__(self, devicename="": str, verbose=False: boolean): 
        # name of the device (e.g. for log messages)
        self.devicename = devicename
        self.verbose = verbose   # whether to log information or be quiet
    def log(self, msg: str):
        print(msg)

class SMU236(myDevice):
    def __init__(self, gpibaddress: float, devicename="": str, verbose=False: boolean):
        myDevice.__init__(self, devicename, verbose)
        self.gpibaddress = gpibaddress

if __name__ == "__main__":
    SMU = SMU236(1234)
    SMU.log("Starting")

method using self.x as default parameter

    def takeScreenshot(self, x=None):
        if x is None:
            x = self.windowsGeo['x']

Basics

Installing packages

python -m pip install --upgrade pip
pip install somemodule
or 
pip3 install somemodule
# using a web proxy
# set proxy for windows cmd session
SET HTTPS_PROXY=http://myProxy:8080
(afterwards --proxy setting below no longer required
or
pip install --proxy http://myProxy:8080 somemodule

Naming Conventions

Google Python Style Guide:

module_name, package_name, ClassName, method_name, ExceptionName, function_name, GLOBAL_CONSTANT_NAME, global_var_name, instance_var_name, function_parameter_name, local_var_name

See Category:Python for more stuff

Variables

del var     # delete / undef a variable
var = None  # sets to null
# check if variable is defined
if xyz in locals() :
# for object oriented projects:
if "xyz" in self.__dict__.keys(): 

sleep for a while

import time
time.sleep(60)

wait for user input

input("press Enter to close")

Access global variables in functions

myVar = 123
def test():
    global myVar # point to global instead of creation of local var
    myVar = 321

Strings

# num <-> str
s = str (i) # int to string
f = float(s) # str -> float
i = int(s)
str(round(f, 1)) # round first 

modify string

text = text.strip() # trim spaces
text = text.lower() # lower / upper cases

text = prompt("Enter Text: ") # get string from prompt
print("Good Morning!", end = ) # print without linebreak

text.replace(x, y)

s.strip() # trim whitespaces from left and right
# replace all (multiple) whitespaces by single space ' '
# join strings
s = ' '.join(s.split())
# no not use s += s1, for s1 in dict
# key1=value1&key2=value2
param_str = "&".join("=".join(tup) for tup in dict.items())

s * 5 # = s+s+s+s+s

import string
string.capwords(s) # upper case first letter of each word and also removes multiple and trailing spaces

substrings

# find a substring:
x in s
> True / False

if len(s) > 0

# handling substrings
a = "abcd"
b = a[:1] + "o" + a[2:] 
> 'aocd'

myString="Hello there !bob@"
i1 = myString.find("!")+1
i2 = myString.find("@")
mySubString=myString[i1:i2]

def substr_between(s: str, s1: str, s2: str) -> str:
    assert s1 in s, f'E: can\'t find \'{s1}\' in \'{s}\
    assert s2 in s, f'E: can\'t find \'{s1}\' in \'{s}\
    i1 = s.find(s1)+len(s1)
    i2 = s.find(s2)
    assert i1 < i2, f'E: \'{s1}\' not before \'{s2}\' in \'{s}\
    return s[i1:i2]

Binary, formatted, raw strings

# Binary Strings
key = b'asdf'
# or 
key = str.encode('asdf')
s = key.decode('ascii')  # decode binary strings
key = s.encode('ascii')  # encode string to binary
# Formatted string
s = f''
# raw string
s = r'c:\Windows\'  # no excape of \  needed

merge variables in string

print ("Renner =", i)
print ("Renner = %3d" % i) # leading 0's
print (f"Renner = {i}")

# place formatted numbers in a string / sprintf
"The %03i %s cost %f euros" % (3, "beers", 11.50)
> 'The 3 beers cost 11.500000 euros'

"The length is %.2f meters" % 72.8958
>'The length is 72.90 meters'

p= "%.1f%%/min" % precent

Lists

like arrays in Perl

L = [1,2,3,4,5,6]
L = [x for x in range(10)]
L = "Word1 Word2 Word3".split() # split by spaces, like QW in Perl, use split(",") to split on ","
len(L)
L[0:10] # get elements 0-10
for a in L:
  ...

ATTENTION: = generates not a clone but a link

M = L       # M's elements are links to L's
# clones can be achieved via:
M = L.copy  # clones L
M = L[:]    # clones L
M = list(L) # clones L
L.append(x) # append a single element
L.extend(M) # put elements of list M to the end of List L
L.insert(i, x) # insert item x at position int i
L.pop() # returns and removes the last item
L.pop(i) # returns and removes the item at position int i
L.reverse()
L = sorted (L, key=str.casefold) # case insentitive / ignore case
L.remove(x) # removes the first occurrence of item x
L.count(x) # how many items x are in the list
L.index(x) # gives the position of the first x in list
s="".join(L)
x in L 
x not in L

# list to string
s = "\n".join(L)

# string to list
L = s.split("\n")

Initiate an "empty" list of certain length: consisting of certain number of None elements:

l = [None] * 10

merge 2 lists to list of tuples

data = list(zip(data_x, data_y))

Cartesian product of lists / tuples

import itertools
for i in itertools.product(*listOfLists):
    print(i)

# remove duplicate values from list
myList = list(dict.fromkeys(myList))


Looping over lists

modify each item in list by adding constant string

l = [s + ';' + v for v in l]

modify item in list

for idx, line in enumerate(cont):
  if "K1001/1" in line:
    line = "K1001/1 Test Nr " + str(i) + "\n"
    cont[idx] = line
    break

remove empty values from end

while L[-1] == "":
  # L = L[0:-1]
  L.pop()

modify or even remove certain items

# from https://stackoverflow.com/a/6024599
# iterates in-situ via index and reversing
for i in range(len(somelist) - 1, -1, -1):
    element = somelist[i]
    do_action(element)
    if check(element):
        del somelist[i]

Multi Dim Lists

lAllPoints = []
lAllPoints.append = ['a','b','c']
# or using tuple
lAllPoints.append = ('a','b','c')

sort multidim list

lAllPoints = sorted(lAllPoints, key=lambda x: x[0], reverse=False)

Tuples

Ordered sequence, with no ability to replace or delete items

L = (1,2,3,4,5,6)

list -> tuple

l = tuple(l)

combine 2 tuples

l = la + lb

Dictionaries

like hash in Perl

d = {'keyx': valuex, 'keyy': valuey}
d['keyz'] = valuez
tel.keys()
['keyx', 'keyy', 'keyz']
del d['keyy']

len(d)
d.clear()
d.copy()
d.keys()
d.values()
d.items() # returns a list of tuples (key, value)
d.get(k) # returns value of key k
d.get(k, x) # returns value of key k; if k is not in d it returns x
d.pop(k) # returns and removes item k
d.pop(k, x) # returns and removes item k; if k is not in d it returns x
x in d
x not in d

# loop over all keys and retrieve there values as well 
for key, value in d.items():
  print (f"{key} = {value}")

# sort keys:
for userid in sorted (dict.keys()):
# sort values reversed
for id, value in sorted(d.items(), key=lambda item: item[1], reverse=True):

MultiDim Dictionaries

dicProductivity = {} 
dicProductivity['Cursor'] = {}
dicProductivity['Cursor']['Nr'] = 1
dicProductivity['Cursor']['Prod'] = 1.909E18
dicProductivity['Cursor']['Cost'] = 0
dicProductivity['Cursor']['Img'] = 'templates/Shop01Cursor.png'
dicProductivity['Grandma'] = {}
dicProductivity['Grandma']['Nr'] = 2
dicProductivity['Grandma']['Prod'] = 1.725E18
dicProductivity['Grandma']['Cost'] = 0
dicProductivity['Grandma']['Img'] = 'templates/Shop02Grandma.png'

for k in dicProductivity.keys() :
   print(k)
   if 'Img' in dicProductivity[k] :
       print("ja")

Alternatively one can use a tuple as key for dictionary:

d = fetchDataAsDict()
myTuple = (d["description"], d["meaning"], d["source"], d["fileName"])
dict_with_tuple_as_key[myTuple] = value

Loops

for / while controls

break    = exit loop
continue = cancel current iteration and go to start of next iteration

ATTENTION: The loops do not create a new variable scope. Only functions and modules introduce a new scope!

for i in range (10):
    print(i)
del (i)
while i <= 100:
   i+=1
   ...
   if sth:
       break

for i in range(1, 5):
  print i
  if sth:
    continue

for f in list :

inline if (requires a dummy else):

print("something") if self.verbose else 0

methods/functions

example:
def get_labeled_exif(exif: dict) -> dict:
    """converts the exif key IDs into strings and returns that readable dict"""
    labeled = {}
    for (key, val) in exif.items():
        labeled[TAGS.get(key)] = val
    return labeled

asserts function argument validation

aus Python Kurs von Carsten Knoll

def eine_funktion(satz, ganzzahl, zahl2, liste):
  if not type(satz) == str:
    print "Datentpyfehler: satz"
    return -1
  if not isinstance(ganzzahl, int):
    print "Datentpyfehler: ganzzahl"
    return -2
  if not isinstance(liste, (tuple, list)):
    print "Datentpyfehler: liste"
    return -3
  # Kompakteste Variante (empfohlen): 
  assert zahl2 > 0, "Error: zahl2 ist nicht > 0" # Assertation-Error bei Nichterfuellung
def F(x):
  if not isinstance(x, (float, int)):
    msg = "Zahl erwartet, %s bekommen" % type(x)
    raise ValueError(msg)
  return x**2

better:

def F(x):
  assert isinstance(x, (float, int)), "Error: x is not of type float or int"
  return x**2
assert variant in ['normal', 'gray', 'cannyedge'], "Error: variant is not in 'normal', 'gray', 'cannyedge'"

Imports

import sys
import datetime
import time
import math
import random
import os.path
# Import my files
import MyFile # without tailing .py
# import a file, not stored in the same folder
import sys
sys.path.append("../libs/MyFile ")

Math

see Python - Math for linear regression

Python 2: get rid of the annoying integer division: [1]

from __future__ import division

Modulo

15 % 4
--> 3

Random

import random
random.randint(1000000, 9999999)

Date and Time

http://pleac.sourceforge.net/pleac_python/datesandtimes.html

import datetime
d = datetime.date.today().strftime("%y%m%d")
d = datetime.datetime.today().strftime("%y%m%d-%H%M")
# now in UCT
d = datetime.datetime.now(datetime.timezone.utc)
# now in UTC without milliseconds
d = datetime.datetime.utcnow().replace(microsecond=0).isoformat() + 'Z'

parsing iso dates

from datetime import datetime
dt = datetime.fromisoformat('2017-01-01T12:30:59.000000')
dt = datetime.fromisoformat(str[:-1]) # to remove "Z" from end
dt = datetime.fromisoformat('2020-03-10 06:01:01+00:00')

rounding datetime

from datetime import datetime, timedelta
def floor_dt_minutes(dt: datetime, res: int = 5) -> datetime:
    """ floor (=round down) minutes to X min resolution """
    minNew = res * (dt.minute // res)
    return dt.replace(minute=minNew, second=0, microsecond=0)

def ceil_dt_minutes(dt: datetime, res: int = 5) -> datetime:
    """ ceil (=round up) minutes to X min resolution """
    minNew = res * (1 + dt.minute // res)
    return dt.replace(minute=0, second=0, microsecond=0) + \
        timedelta(minutes=minNew)

def round_dt_minutes(dt: datetime, res: int = 5) -> datetime:
    """ round minutes to X min resolution """
    minOldDec = float(dt.minute) + float(dt.second)/60
    minNew = res * round(minOldDec / res)
    return dt.replace(minute=0, second=0, microsecond=0) + \
        timedelta(minutes=minNew)

dt = datetime.fromisoformat('2020-03-10 06:01:01+00:00')
print(f"original: {dt}")
print(f"floored: {floor_dt_minutes(dt,5)}")
print(f"ceileded: {ceil_dt_minutes(dt,5)}")
print(f"rounded: {round_dt_minutes(dt,5)}")

timing

measure time elapsed

import time
timestart = time.time()
...
print(time.time() - timestart)

calculate time

import time
duration = 1234 # sec
print "ETA =",time.ctime(time.time()+duration)
array = time.localtime(time.time()+duration)

File Access

http://www.penzilla.net/tutorials/python/fileio/

Get filename of python script

from sys import argv
myFilename = argv[0]

Read filename from commandline parameter

import sys
for filename in sys.argv:

Split path into folder, filename, ext

import os
(dirName, fileName) = os.path.split(f)
(fileBaseName, fileExtension)=os.path.splitext(fileName)

Filename without extension

fileOut = os.path.splitext(fileIn)[0] + "-edit.jpg"

Cross platform paths

currentdir = os.curdir
mysubdir = os.path.join(currentdir, "mysubdir")

Run external program

import subprocess
process = subprocess.run(["sudo", "du", "--max-depth=1", mydir], capture_output=True, text=True)
print (process.stdout)

old, depricated way:

os.system( "gnuplot " + ausgabedatei )

Globbing of file names

import glob
glob.glob('html/[0-9]*.html')

[2] You can use glob:

import glob, os
os.chdir("/mydir")
for file in glob.glob("*.txt"):
    print(file)

or simply os.listdir:

import os
for file in os.listdir("/mydir"):
    if file.endswith(".txt"):
        print(os.path.join("/mydir", file))

or if you want to traverse directory, use os.walk:

import os
for root, dirs, files in os.walk("/mydir"):
    for file in files:
        if file.endswith(".txt"):
             print(os.path.join(root, file))

Read file

Check if file / dir exists

import os.path  # os.path - The key to File I/O
os.path.exists("text.txt")
os.path.isfile("text.txt")
os.path.isdir("text")
os.path.isabs("/home/torben/text.txt") # Is it an absolute path
with open(cache_file, mode='r', encoding='utf-8') as fh:
  cont = fh.read()
  # or
  list = (fh.readlines())
  # or
  line =  fh.readline()
  # or
  for line in fh:
     print(line)

fh = open(filename, mode='r', encoding='utf-8')
...
fh.close()

Write to File

fileOut= "out/1/out.txt"
(filepath, fileName) = os.path.split(fileOut)
# (fileBaseName, fileExtension) = os.path.splitext(fileName)
os.makedirs(filepath, exist_ok=True) # = mkdir -p

with open(fileOut, mode='w', encoding='utf-8', newline='\n') as fh:
  # w = overWrite file ; a = append to file
  # If running Python in Windows, "\n" is automatically replaced by "\r\n". To prevent this use newline='\n'
  fh.writelines(list) # no linebreaks
  # or
  fh.write('\n'.join(list))
  # or
  for line in list:
    fh.write(line)
  
  # Force update of filecontents without closing it
  fh.flush()

# alternative
fh = open(fileOut, mode='w', encoding='utf-8', newline='\n') 
...
fh.close()
 

Touch file

if os.path.exists(fname):
  os.utime(fname, None)
else:
  open(fname, 'w').close()

File Meta Data

Get file size

import os
int (os.path.getsize("moinsen.txt") )

Read Timestamp (last modified)

lasttime = os.path.getmtime(fname)

Copy File

shutil.copyfile(fileTemp,
                os.path.join(dest_path, fileOut))

Delete

Delete file

os.remove(filename)

Delete file if its size = 0

if os.stat(fileOut2).st_size == 0:
    os.remove(fileOut2)

Directories / Folders

make dirs

if not os.path.isdir(d) :
  os.mkdir(d) # normal
  os.makedirs(d) # recursively= with all parents

Delete folder+contents

import shutil
shutil.rmtree(d)

Get list of files (not dirs) in directory

# walk into path an fetch all files matching extension jpe?g
files = []
for (dirpath, dirnames, filenames) in os.walk('.'):
    dirpath = dirpath.replace('\\', '/')
    for file in filenames:
        if re.search(r'\.jpe?g$', file, re.IGNORECASE):
            files.append(dirpath+'/'+file)

older simpler version not walking into subfolders

dirname = "/path/to/some/dir"
listoffiles = [ f for f in os.listdir(dirname) if os.path.isfile(os.path.join(dirSource,f)) ]
listoffiles.sort()

same with filter on fileext

listoffiles = [ f for f in os.listdir(dirSource) if os.path.isfile(os.path.join(dirSource,f)) and f.lower()[-4:] == ".gpx" ]

Templates/Snippets

Commandline Parameters

import argparse
parser = argparse.ArgumentParser()  # construct the argument parser and parse the arguments
# -h comes automatically

# Boolean Parameter
parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true")  # store_true -> Boolean Value

# Choice Parameter
# restrict to a list of possible values / choices
# parser.add_argument("--choice", type=int, choices=[0, 1, 2], help="Test choices")

# Positional Parameter (like text.py 123)
# parser.add_argument("num", type=int, help="Number of things")

# Required Parameter
# parser.add_argument("-i", "--input", type=str, required=True, help="Path of input file")

# Optional Parameter
parser.add_argument("-n", "--number", type=int, help="Number of clicks")
# Optional Parameter with Default
parser.add_argument("-s", "--seconds", type=int, default=secDefault, help="Duration of clicking, default = %i (sec)" % secDefault)

args = vars(parser.parse_args())

if args["verbose"]:
    pass # do nothing
   # print ("verbosity turned on") 
if args["number"]:
    print("num=%i" % args["number"])

ReadConfigFile

Config.ini

[Section1]
Cursor         = 205E18
Grandma        =  18E18
Farm           =  11E18
Mine           = 514E18
Factory        = 155E18

test.py

from configparser import ConfigParser
config = ConfigParser()
config.read('Config.ini', encoding='utf-8')
print(config.getfloat('Section1', 'Cursor'))

for sec in config.sections():
    for key in config.options(sec):
        value = config.get(sec, key)


ReadConfigFile to connect to PostgreSQL

from [3] database.ini

[postgresql]
host=dbhost
port=5432
database=dbname
user=dbuser
password=dbpass

config.py

from configparser import ConfigParser
def config(filename='database.ini', section='postgresql'):
   parser = ConfigParser()
   parser.read(filename)
   # get section, default to postgresql
   db = {}
   if parser.has_section(section):
       params = parser.items(section)
       for param in params:
           db[param[0]] = param[1]
   else:
       raise Exception(
           'Section {0} not found in the {1} file'.format(section, filename))
   return db

main.py

import psycopg2
from config import config 

def connect():
   """ Connect to the PostgreSQL database server """
   conn = None
   try:
       params = config() # read connection parameters
       print('Connecting to the PostgreSQL database...')
       conn = psycopg2.connect(**params)
       cur = conn.cursor() # create a cursor
       print('PostgreSQL database version:')
       cur.execute('SELECT version()')  # execute a statement
       db_version = cur.fetchone()
       print(db_version)
       cur.close()
   except (Exception, psycopg2.DatabaseError) as error:
       print(error)
   finally:
       if conn is not None:
           conn.close()
           print('Database connection closed.')

if __name__ == '__main__':
   connect()

PostgreSQL: export result to csv file

sql1 = "SELECT * FROM table"
sql2 = "COPY (" + sql1 + ") TO STDOUT WITH CSV HEADER DELIMITER '\t'"
        with open("out.csv", "w") as file:
            cursor.copy_expert(sql2, file)

Checking Operating System

import os
import sys
if os.name == 'posix':
  print ('posix/Unix/Linux')
elif os.name == 'nt':
  print ('windows')
else:
  print ('unknown os')
  sys.exit(1) # throws exception, use quit() to close / die silently

accessing os envrionment variables

import os
print(os.getenv('tmp'))

Logging

V2: File and STDOUT

import logging
# from [4]
# create logger
logger = logging.getLogger('myLogger')
logger.setLevel(logging.DEBUG)
# create file handler which logs even debug messages
fh = logging.FileHandler('mylog.log')
fh.setLevel(logging.DEBUG)
# create console handler with a higher log level
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
# create formatter and add it to the handlers
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')  # %(name)s = LoggerName, %(threadName)s = TreadName
fh.setFormatter(formatter)
ch.setFormatter(formatter)
# add the handlers to the logger
logger.addHandler(fh)
logger.addHandler(ch)  

logger.debug('DebugMe')
logger.info('Starting')
logger.warning('Attention')
logger.error('Something went wrong')
logger.critical('Something seriously went wrong')

V1: STDOUT

import logging
# Logging is nicer than print, as it can automatically add the threadname
logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s %(levelname)s %(threadName)s: %(message)s',
                    )
logging.info('Starting')

Image/Picture/Photo Resize and Exif Modifying

from PIL import Image, ImageFilter  # pip install Pillow
import os, sys

fileIn = '2018-02-09 13.56.25.jpg'
# Read image
img = Image.open( fileIn )

Resize

# Resize keeping aspect ration -> img.thumbnail
# drops exif data, exif can be added from source file via exif= in save, see below
size = 1920, 1920
img.thumbnail(size, Image.ANTIALIAS)

Export file

fileOut = os.path.splitext(fileIn)[0] + "-edit.jpg"
try:
    img = Image.open(fileIn)
    img.save(fp=fileOut, format="JPEG", quality='keep')  # exif=dict_exif_bytes
    # JPEG Parameters
    # * qualitiy : 'keep' or 1 (worst) to 95 (best), default = 75. Values above 95 should be avoided.
    # * dpi : tuple of integers representing the pixel density, (x,y)
except IOError:
    print("cannot write file '%s'" % fileOut)

Export Progressive / web optimized JPEG

from PIL import ImageFile  # for MAXBLOCK for progressive export
fileOut = os.path.splitext(fileIn)[0] + "-progressive.jpg"
try:
    img.save(fp=fileOut, format="JPEG", quality=80, optimize=True, progressive=True)
except IOError:
    ImageFile.MAXBLOCK = img.size[0] * img.size[1]
    img.save(fp=fileOut, format="JPEG", quality=80, optimize=True, progressive=True)

Excel Read and Write

Read
import openpyxl
workbookIn = openpyxl.load_workbook(pathToMyExcelFile, data_only=True)  # data_only : read values instead of formulas
sheetIn = workbookIn['mySheetName']
# or fetch active sheet
sheetIn = workbook.active
cellIn = sheetIn['A34']
# or
cellIn = sheetIn[row=34, column=1] # index start here with 1
print (cellIn.value)
Write
import openpyxl
workbookOut = openpyxl.Workbook()
sheetOut = workbookOut.active
cellIn = sheetOut['A34']
# or
cellIn = sheetOut[row=34, column=1] # index start here with 1
cellOut.value = 'asdf'
workbookOut.save('out.xlsx')

JPEG Meta Data: EXIF and IPTC

IPTC: Tags/Keywords
from iptcinfo3 import IPTCInfo  # this works in pyhton 3!
iptc = IPTCInfo(fileIn)
if len(iptc['keywords']) > 0:  # or supplementalCategories or contacts
    print('====> Keywords')
    for key in sorted(iptc['keywords']):
        s = key.decode('ascii')  # decode binary strings
        print(s)
EXIF via piexif
import piexif  # pip install piexif
exif_dict = piexif.load(img.info['exif'])
print(exif_dict['GPS'][piexif.GPSIFD.GPSAltitude])
# returns list of 2 integers: value and donator  -> v / d
# (340000, 1000) => 340m
# (51, 2) => 25.5m

# Modify altitude
exif_dict['GPS'][piexif.GPSIFD.GPSAltitude] = (140, 1)  # 140m

# write to file
exif_bytes = piexif.dump(exif_dict)
fileOut = os.path.splitext(fileIn)[0] + "-modExif.jpg"
try:
    img.save(fp=fileOut, format="jpeg", exif=exif_bytes, quality='keep')
except IOError:
    print("cannot write file '%s'" % fileOut)

or

exif_dict = piexif.load(fileIn)
for ifd in ("0th", "Exif", "GPS", "1st"):
    print("===" + ifd)
    for tag in exif_dict[ifd]:
        print(piexif.TAGS[ifd][tag]["name"], "\t",
              tag, "\t", exif_dict[ifd][tag])
print(exif_dict['0th'][306]) # 306 = DateTime
EXIF via exifread
# Open image file for reading (binary mode)
fh = open(fileIn, 'rb')
# Return Exif tags
exif = exifread.process_file(fh)
fh.close()
# for tag in exif.keys():
#     if tag not in ('JPEGThumbnail', 'TIFFThumbnail', 'Filename', 'EXIF MakerNote'):
#         print("%s\t%s" % (tag, exif[tag]))
print(exif['Image DateTime'])
print(exif['GPS GPSLatitude'])
print(exif['GPS GPSLongitude'])
EXIF GPS via PIL
# from https://developer.here.com/blog/getting-started-with-geocoding-exif-image-metadata-in-python3
def get_exif(filename):
    image = Image.open(filename)
    image.verify()
    image.close()
    return image._getexif()


def get_labeled_exif(exif):
    labeled = {}
    for (key, val) in exif.items():
        labeled[TAGS.get(key)] = val
    return labeled

def get_geotagging(exif):
    if not exif:
        raise ValueError("No EXIF metadata found")
    geotagging = {}
    for (idx, tag) in TAGS.items():
        if tag == 'GPSInfo':
            if idx not in exif:
                raise ValueError("No EXIF geotagging found")
            for (key, val) in GPSTAGS.items():
                if key in exif[idx]:
                    geotagging[val] = exif[idx][key]
    return geotagging


def get_decimal_from_dms(dms, ref):
    degrees = dms[0][0] / dms[0][1]
    minutes = dms[1][0] / dms[1][1] / 60.0
    seconds = dms[2][0] / dms[2][1] / 3600.0
    if ref in ['S', 'W']:
        degrees = -degrees
        minutes = -minutes
        seconds = -seconds
    return round(degrees + minutes + seconds, 5)


def get_coordinates(geotags):
    lat = get_decimal_from_dms(
        geotags['GPSLatitude'], geotags['GPSLatitudeRef'])
    lon = get_decimal_from_dms(
        geotags['GPSLongitude'], geotags['GPSLongitudeRef'])
    return (lat, lon)


exif = get_exif(fileIn)
exif_labeled = get_labeled_exif(exif)
print(exif_labeled['DateTime'])

geotags = get_geotagging(exif)
print(get_coordinates(geotags))

Template Matching

see Python - CV2

Optical Character Recognition (OCR)

see Python - OCR

Multi Threading

see Python - Multithreading

Send E-Mails

see Python - eMail

Parse CSV Files

import csv
with open('data/ref_selected_countries.csv', mode='r', encoding='utf-8') as f:
    csv_reader = csv.DictReader(f, dialect='excel', delimiter="\t")
    for row in csv_reader:
        print(f'\t{row["name"]} works in the {row["department"]} department')

Write CSV Files

plain writing

with open('data.tsv', mode='w', encoding='utf-8', newline='\n') as fh:
    csvwriter = csv.writer(fh, delimiter="\t")
    csvwriter.writerow(  
        ('Date', 'Confirmed')
    )

list of dicts writing

with open(filename+'.tsv', mode='w', encoding='utf-8', newline='\n') as fh:
    csvwriter = csv.DictWriter(fh, delimiter='\t', extrasaction='ignore', fieldnames=[
                              'date', 'occupied_percent', 'occupied', 'total'])
    csvwriter.writeheader()
    for d in myList:
        d['occupied_percent'] = round(100*d['occupied'] / d['total'], 1)
        csvwriter.writerow(d)

Parse JSON Files

Read JSON

with open(download_file, mode='r', encoding='utf-8') as f:
    d_json = json.load(f)

Write dict to file in JSON format, keeping utf-8 encoding

with open('my_file.json', mode='w', encoding='utf-8', newline='\n') as fh:
    json.dump(my_dict, fh, ensure_ascii=False, sort_keys=True)

Download file

in Python 3 this is the preferred way of downloading files:

import urllib.request
url = "https://pomber.github.io/covid19/timeseries.json"
filedata = urllib.request.urlopen(url)
datatowrite = filedata.read()
with open('test.json', 'wb') as f:
    f.write(datatowrite)

Download HTML and extract elements from table using xpath

from lxml import html
import requests
page = requests.get(url)
tree = html.fromstring(page.content)
tbody_trs = tree.xpath('//*/tbody/tr')
l_rows = []
for tr in tbody_trs:
    l_columns = []
    if len(tr) != 15:
        continue
    for td in tr:
        l_columns.append(td.text_content())
        l_rows.append(list(l_columns))

Call Rest API

def perform_rest_call_str(url: str) -> str:
    resp = requests.get(url)
    if resp.status_code != 200:
        raise Exception(
            f'E: bad response. status code:{resp.status_code}, text:\n{resp.text}')
    return resp.text

Regular Expressions

see Python - Regular Expressions see [5] for an online tester

Match email

def checkValidEMail(email: str) -> bool:
    # from https://stackoverflow.com/posts/719543/timeline bottom edit
    if not re.fullmatch(r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$", email):
        print("Error: invalid email")
        quit()
    return True

Hexadecimal

see Python - Hex

Compile to .exe

pip install pyinstaller
pyinstaller --onefile --console your.py

(Python - py2exe is deprecated)

Cryptography and Hashing

Hashing via SHA256

def gen_SHA256_string(s: str) -> str:
    m = hashlib.sha256()
    m.update(s.encode('ascii'))
    return m.hexdigest()

Hashing via MD5

(MD5 is not secure, better use SHA256)

def gen_MD5_string(s: str) -> str:
    m = hashlib.md5()
    m.update(s.encode('ascii'))
    return m.hexdigest()

Password hashing via bcrypt

import bcrypt
pwd = 'geheim'
pwd = pwd.encode("utf-8")
# or 
pwd = b'geheim'

hashed = bcrypt.hashpw(pwd, bcrypt.gensalt())
if bcrypt.checkpw(pwd, hashed):
    print("It Matches!")
    print(hashed.decode("utf-8"))

To use version 2a instead of 2b (default):

bcrypt.gensalt(prefix=b"2a")

SQL Lite / sqllite

see [6] SQLite natively supports the following types: NULL, INTEGER, REAL, TEXT, BLOB. Additinally date works as well

import sqlite3

# connect to DB
# con = sqlite3.connect('database_file.db')
con = sqlite3.connect(":memory:")
# con.row_factory = sqlite3.Row  # allows for access via row["name"]
cur = con.cursor()
 
# create table
cur.execute(
CREATE TABLE points
   (date date, lat real, lon real, source text, name text)
)

# ALTER TABLE
cur.execute("ALTER TABLE points ADD date_registered date")

# INSERT
sql = "INSERT INTO points(date) VALUES (?)"
cur.execute(sql, (datetime.date.today(),))  # here the "," is required if only one entry in tuple

sql = "INSERT INTO points(date, lat, lon) VALUES (?,?,?)"
cur.execute(sql, (myDate, myLat, myLon))
con.commit()

# UPDATE
sql = "UPDATE points SET lat = ? WHERE hash = ?"
cur.execute(sql, (myLat, h))
con.commit()

# SELECT 1 row 
sql = "SELECT date FROM points WHERE hash = ? LIMIT 1"
row = cur.execute(sql, (h,)).fetchone()

# SELECT multiple rows
sql = "SELECT date, lat, lon FROM points ORDER BY email"
for row in cur.execute(sql):
    print (row[0])
    print (row["date"]) # if "con.row_factory = sqlite3.Row" is used

cur.close()
con.close()

GPX parsing

import gpxpy
import gpxpy.gpx
# Elevation data by NASA: see lib at https://github.com/tkrajina/srtm.py
fh_gpx_file = open(gpx_file_path, 'r')
gpx = gpxpy.parse(fh_gpx_file)
#  Loops for accessing the data
for track in gpx.tracks:
    for segment in track.segments:
        for point in segment.points:
for waypoint in gpx.waypoints:
for route in gpx.routes:
    for point in route.points: 
# interesting properties of point / waypoint objects:
point.time
point.latitude
point.longitude
point.source
waypoint.name

Process Bar

see tqdm

from tqdm import tqdm
for i in tqdm(range(10000)):
    ....

CGI Web development

# Print necessary headers.
print("Content-Type: text/html")
print()

# errors and debugging info to browser
import cgitb
cgitb.enable()


Access URL or Form Parameters

# V2 from https://www.tutorialspoint.com/python/python_cgi_programming.htm
import cgi
form = cgi.FieldStorage()
username = form.getvalue('username')
print(username)
# V1
import sys
import urllib.parse
query = os.environ.get('QUERY_STRING')
query = urllib.parse.unquote(query, errors="surrogateescape")
d = dict(qc.split("=") for qc in query.split("&"))
print(d)

CGI Backend Returning JSONs

#!/usr/local/bin/python3.6
# -*- coding: utf-8 -*-

import cgi
import json

# Print necessary headers.
print("Content-type: application/json")
print()

def get_form_parameter(para: str) -> str:
    "asserts that a given parameter is set and returns its value"
    value = form.getvalue(para)
    assert value, f"Error: parameter {para} missing"
    assert value != "", f"Error: parameter {para} missing"
    return value
 
response = {}
response['status'] = "ok"

try:
    action = get_form_parameter("action")
    response['action'] = action
    if action == "myAction":
        ...

except Exception as e:
    response['status'] = "error"
    d = {"type": str(type(e)), "text": str(e)}
    response["exception"] = d

finally:
    print(json.dumps(response))

Exceptions

Catch keyboard interrupt and do a "save exit"

try:
  FILE = open("out.txt","w")
  while 1:
    i+=1
    print i
except KeyboardInterrupt:
  FILE.close()

Catch all exceptions

try:
  [...]
except Exception, e:
  print "Exception raised: ", e

Custom Exceptions

try: 
  raise Exception('HiHo')

Math: curve fitting

from[7]

import numpy as np
# curve-fit() function imported from scipy
from scipy.optimize import curve_fit
from matplotlib import pyplot as plt

# Test function with coefficients as parameters
def fit_function(x, a, b):
    return a * np.exp(b * x)

p0 = [data_y[-1], 0.14]  # initial guess of parameters
param, param_cov = curve_fit( fit_function, data_x, data_y, p0, bounds=((-np.inf, -np.inf), (np.inf, np.inf)) )

print(f"Coefficients:\n{param}")
print(f"Covariance of coefficients:\n{param_cov}")

data_y_fit = []
for x in data_x:
    y = fit_function(x, param[0], param[1])
    data_y_fit.append(y)
plt.plot(data_x, data_y, 'o', color='red', label="data")
plt.plot(data_x, data_y_fit, '--', color='blue', label="fit")
plt.legend()
plt.show()

GUI Interactions

Take Screenshot

import pyautogui # (c:\Python\Scripts\)pip install pyautogui
# pyautogui does only support screenshots on monitor #1
...
screenshot = pyautogui.screenshot()
# screenshot = pyautogui.screenshot(region=(screenshotX,screenshotY, screenshotW, screenshotH))
screenshot = np.array(screenshot) 
# Convert RGB to BGR 
screenshot = screenshot[:, :, ::-1].copy()

Mouse Actions

def clickIt(x,y,key="") :
  x0, y0 = pyautogui.position()
  if key != "": # crtl, shift
    pyautogui.keyDown(key)
  pyautogui.moveTo(x, y, duration=0.2)
  pyautogui.click(x=x , y=y, button='left', clicks=1, interval=0.1)
  if key != "": # crtl, shift
    pyautogui.keyUp(key)
  pyautogui.moveTo(x0, y0)

Web Automation

from selenium import webdriver
from selenium.webdriver.common.keys import Keys

# from selenium.webdriver import Firefox
from selenium.webdriver.firefox.options import Options

import os
import time
import glob

class StravaUserMapDL():
    def __init__(self):
        self.driver = webdriver.Firefox()

    def login(self):
        driver = self.driver
        url = "https://www.somewebpage.com"
        email = "myemail"
        password = "mypassword"
        driver.get(url)

        title = driver.title
        urlIs = driver.current_url
        cont = driver.page_source #  as string
        FILE = open(filename,"w") # w = overWrite file ; a = append to file
        FILE.write(cont)
        FILE.close()         

        # handle login if urlIs != url
        if (urlIs != url): 
            # activate checkbox 'remember_me'
            elem = driver.find_element_by_id('remember_me')
            if (elem.is_selected() == False):
                elem.click()
            assert elem.is_selected() == True
            elem = driver.find_element_by_id('email')
            elem.send_keys(email)
            elem = driver.find_element_by_id('password')
            elem.send_keys(password)
            elem.send_keys(Keys.RETURN)
            # Wait until login pages is replaced by real page
            urlIs = driver.current_url
            while (urlIs != url):
                time.sleep(1)
                urlIs = driver.current_url
            print (urlIs)

            # results = driver.find_elements_by_class_name('following')
            # results = driver.find_elements_by_tag_name('li')

            # print(results[0].text)
        assert (urlIs == url)

Unit Tests using Web Automation

import unittest
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

#from selenium.webdriver import Firefox
from selenium.webdriver.firefox.options import Options

import os
import time

class PythonOrgSearch(unittest.TestCase):
#    def __init__(self,asdf):
#        self.driver = webdriver.Firefox() 

    def setUp(self):
        print ("setUp")
        # headless mode:
        # opts = Options()
        # opts.set_headless()
        # assert opts.headless  # Operating in headless mode
        # self.driver = webdriver.Firefox(options=opts)

        self.driver = webdriver.Firefox()

    def test_search_in_python_org(self):
        driver = self.driver
        driver.get("http://www.python.org")
        self.assertIn("Python", driver.title)
        elem = driver.find_element_by_name("q")
        elem.send_keys("pycon")
        elem.send_keys(Keys.RETURN)
        assert "No results found." not in driver.page_source
        print ("fertig: python_org")

    def tearDown(self):
        print ("tearDown")
        print ("close Firefox")
        self.driver.close() # close tab
        self.driver.quit() # quit browser
        # os._exit(1) # exit unittest without Exception


if __name__ == "__main__":
    try:
        unittest.main()
    except SystemExit as e:
        os._exit(1)

Debugging

Print name of the current function, useful to place at every function

import sys
...
if self.verbose:
  print("=== " + sys._getframe().f_code.co_name + " ===")
pip install ipython
...
from IPython import embed  
...
embed()  # to drop into iPython Shell from within the code

Or use the editor PyCharm or Visual Studio Code to set breakpoints to drop into python debugger

Profiling: time and number of calls per function

Var 1: via external calling

python -m cProfile myScript.py > 1.txt

Var 2: loading the module in the .py script

import cProfile
pr = cProfile.Profile()
pr.enable()
<... do some work ...>
pr.disable()
pr.print_stats()