在python3中初学者应会的一些基本的提升效率的小技巧

aa = {‘mike’: ‘male’, ‘kathy’: ‘female’, ‘steve’: ‘male’, ‘hillary’: ‘female’}
bb = {‘mike’: ‘male’, ‘ben’: ‘male’, ‘hillary’: ‘female’}
aa.keys() & bb.keys() # {‘mike’, ‘hillary’} # these are set-like
aa.keys() – bb.keys() # {‘kathy’, ‘steve’}
# if you want to get the common key-value pairs in the two dictionaries
aa.items() & bb.items() # {(‘mike’, ‘male’), (‘hillary’, ‘female’)}

dictionary = {}
for k, v in ls:
if not k in dictionary:
dictionary[k] = []
dictionary[k].append(v)

from collections import defaultdict
dictionary = defaultdict(list) # defaults to list
for k, v in ls:
dictionary[k].append(v)

from itertools import chain
a = {‘x’: 1, ‘y’:2, ‘z’:3}
b = {‘y’: 5, ‘s’: 10, ‘x’: 3, ‘z’: 6}
# update a with b
c = dict(chain(a.items(), b.items()))
c # {‘y’: 5, ‘s’: 10, ‘x’: 3, ‘z’: 6}

c = a.copy()
c.update(b)

aa = {k: sum(range(k)) for k in range(10)}
aa # {0: 0, 1: 0, 2: 1, 3: 3, 4: 6, 5: 10, 6: 15, 7: 21, 8: 28, 9: 36}
max(aa.values()) #36

max(zip(aa.values(), aa.keys()))
# (36, 9) => value, key pair

sorted(zip(aa.values(), aa.keys()), reverse=true)
# [(36, 9), (28, 8), (21, 7), (15, 6), (10, 5), (6, 4), (3, 3), (1, 2), (0, 1), (0, 0)]

def compute_average_salary(person_salary):
person, *salary = person_salary
return person, (sum(salary) / float(len(salary)))
person, average_salary = compute_average_salary([“mike”, 40000, 50000, 60000])
person # ‘mike’
average_salary # 50000.0

def compute_average_salary(person_salary_age):
person, *salary, age = person_salary_age
return person, (sum(salary) / float(len(salary))), age
person, average_salary, age = compute_average_salary([“mike”, 40000, 50000, 60000, 42])
age # 42

for k, v in dictionary.items():
process(v)
# we are separating head and the rest, and process the values
# as a list similar to the above. head becomes the key value
process(rest)
# if not very clear, consider the following example
aa = {k: list(range(k)) for k in range(5)} # range returns an iterator
aa # {0: [], 1: [0], 2: [0, 1], 3: [0, 1, 2], 4: [0, 1, 2, 3]}
for k, v in aa.items():
sum(v)
#0
#0
#1
#3
#6
aa = [[ii] + list(range(jj)) for ii, jj in enumerate(range(5))]
print(sum(rest))
#0
#0
#1
#3
#6

collections用作计数器

collections是我在python中最喜欢的库之一，在python中，除了原始的默认的，如果你还需要其他的数据结构，你就应该看看这个。

import re
word_list = list(map(lambda k: k.lower().strip(), re.split(r'[;,:(.s)]s*’, python_string)))
word_list[:10] # [‘python’, ‘is’, ‘a’, ‘widely’, ‘used’, ‘general-purpose’, ‘high-level’, ‘programming’, ‘language’, ‘[17][18][19]’]

from collections import defaultdict # again, collections!
dictionary = defaultdict(int)
for word in word_list:
dictionary[word] += 1

from collections import counter
counter = counter(word_list)
# getting the most common 10 words
counter.most_common(10)
[(‘the’, 164), (‘and’, 161), (‘a’, 138), (‘python’, 138),
(‘of’, 131), (‘is’, 102), (‘to’, 91), (‘in’, 88), (‘’, 56)]
counter.keys()[:10] # just like a dictionary
[‘’, ‘limited’, ‘all’, ‘code’, ‘managed’, ‘multi-paradigm’,
‘exponentiation’, ‘fromosing’, ‘dynamic’]

dir(counter)
[‘__add__’, ‘__and__’, ‘__class__’, ‘__cmp__’, ‘__contains__’, ‘__delattr__’, ‘__delitem__’, ‘__dict__’,
‘__doc__’, ‘__eq__’, ‘__format__’, ‘__ge__’, ‘__getattribute__’, ‘__getitem__’, ‘__gt__’, ‘__hash__’,
‘__init__’, ‘__iter__’, ‘__le__’, ‘__len__’, ‘__lt__’, ‘__missing__’, ‘__module__’, ‘__ne__’, ‘__new__’,
‘__or__’, ‘__reduce__’, ‘__reduce_ex__’, ‘__repr__’, ‘__setattr__’, ‘__setitem__’, ‘__sizeof__’,
‘__str__’, ‘__sub__’, ‘__subclasshook__’, ‘__weakref__’, ‘clear’, ‘copy’, ‘elements’, ‘fromkeys’, ‘get’,
‘has_key’, ‘items’, ‘iteritems’, ‘iterkeys’, ‘itervalues’, ‘keys’, ‘most_common’, ‘pop’, ‘popitem’, ‘setdefault’,
‘subtract’, ‘update’, ‘values’, ‘viewitems’, ‘viewkeys’, ‘viewvalues’]

collections也有_chain函数，其可被用作扁平嵌套lists

from collections import chain
ls = [[kk] + list(range(kk)) for kk in range(5)]
flattened_list = list(collections._chain(*ls))

with open(input_file_path) as inputfile:
with open(output_file_path, ‘w’) as outputfile:
for line in inputfile:
outputfile.write(process(line))

with open(input_file_path) as inputfile, open(output_file_path, ‘w’) as outputfile:
for line in inputfile:
outputfile.write(process(line))

import datetime
previous_monday = some_date – datetime.timedelta(days=some_date.weekday())
# similarly, you could map to next monday as well
next_monday = some_date + date_time.timedelta(days=-some_date.weekday(), weeks=1)

from html.parser import htmlparser
class htmlstrip(htmlparser):
def __init__(self):
self.reset()
self.ls = []
def handle_data(self, d):
self.ls.append(d)
def get_data(self):
return ‘’.join(self.ls)
@staticmethod
def strip(snippet):
html_strip = htmlstrip()
html_strip.feed(snippet)
clean_text = html_strip.get_data()
return clean_text
snippet = htmlstrip.strip(html_snippet)

escaped_snippet = html.escape(html_snippet)
# back to html snippets(this is new in python 3.4)
html_snippet = html.unescape(escaped_snippet)
# and so forth …

Posted in 未分类