像计算机科学家一样学习python(8)

元组+数据结构的选择

Python学习笔记-像计算机科学家一样学习python(8)

元组

Python
def sumall(*num):
    sum_all = 0
    for n in num:
        sum_all = sum_all + n
    return sum_all
print(sumall(1,2,3,4,5))

练习

1-1

Python
def histogram(s):
    """创造字符串s的直方图
    s:str"""
    d = dict()
    for c in s:
        d[c] = d.get(c, 0) + 1
    return d

def most_frequent(s):
    """Sorts the letters in s in reverse order of frequency.
    s: string
    Returns: list of letters
    """
    hist = histogram(s)
    t = []
    for x, freq in hist.items():
        t.append((freq, x))
    t.sort(reverse=True)
    res = []
    for freq, x in t:
        res.append(x)
    return res

1-2

这个问题最让人困扰的是这里的“回文”和之前的回文并不是一个东西。

Python
def signature(s):
    """Return有字符串中所有字母组成的列表
    s: string
    """
    t = list(s)
    t.sort()
    t = ''.join(t)
    return t


def all_anagrams(filename):
    """建立所以字母构成的字典
    filename:文件名
    Returns: 以字母组成为键的字典
    """
    d = {}
    for line in open(filename):
        word = line.strip().lower()
        t = signature(word)
        if t not in d:
            d[t] = [word]
        else:
            d[t].append(word)
    return d

def print_anagram_sets(d):
    """打印字母的长度与字母键
    d: 以字母组成为键的字典
    """
    for v in d.values():
        if len(v) > 1:
            print(len(v), v)


def print_anagram_sets_in_order(d):
    """以增序打印字母的长度与字母键
    d:以字母组成为键的字典
    """
    t = []
    for v in d.values():
        if len(v) > 1:
            t.append((len(v), v))
    t.sort()
    for x in t:
        print(x)


def filter_length(d, n):
    """Select only the words in d that have n letters.
    d: map from word to list of anagrams
    n: integer number of letters
    returns: new map from word to list of anagrams
    """
    res = {}
    for word, anagrams in d.items():
        if len(word) == n:
            res[word] = anagrams
    return res


if __name__ == '__main__':
    anagram_map = all_anagrams('words.txt')
    print_anagram_sets_in_order(anagram_map)

    eight_letters = filter_length(anagram_map, 8)
    print_anagram_sets_in_order(eight_letters)
    

12-3

Python
def signature(s):
    """Return有字符串中所有字母组成的列表
    s: string
    """
    t = list(s)
    t.sort()
    t = ''.join(t)
    return t


def all_anagrams(filename):
    """建立所以字母构成的字典
    filename:文件名
    Returns: 以字母组成为键的字典
    """
    d = {}
    for line in open(filename):
        word = line.strip().lower()
        t = signature(word)
        if t not in d:
            d[t] = [word]
        else:
            d[t].append(word)
    return d

def check(s1, t):
    """检查两个回文的单词是否只有两个位置字母不一样
    s1,s2:str"""
    n = 0
    for s2 in t:
        for i in range(len(s1)-1):
            if not s1[i] == s2[i]:
                n = n + 1
    if n == 2:
        return True

def print_anagram_sets_in_order(d):
    """以增序打印字母的长度与字母键
    d:以字母组成为键的字典
    """
    t = []
    for v in d.values():
        if len(v) > 1:
            t.append(v)
    t.sort()
    for t1 in t:
        for s in t1:
            if check(s, t1):
                print(t1)



if __name__ == '__main__':
    anagram_map = all_anagrams('words.txt')
    print_anagram_sets_in_order(anagram_map)

12-4

Python
def make_word_dict():
    """打开文件返回字典"""
    d = dict()
    fin = open('words.txt')
    for line in fin:
        word = line.strip().lower()
        d[word] = None
    for letter in ['a', 'i', '']:
        d[letter] = letter
    return d

memo = {}
memo[''] = ['']

def is_reducible(word, word_dict):
    """如果单词可拆,则返回其子集的字典,并将此条目添加到memo中。
    如果字符串至少有一个子集,则可拆。空字符串也可拆。
    word: str
    word_dict: words作key的字典
    """
    # 使用memo避免重复拆单词
    if word in memo:
        return memo[word]
    # 检查子集,并做一个可拆子集的列表
    res = []
    for child in children(word, word_dict):
        if is_reducible(child, word_dict):
            res.append(child)
    # 使用memo避免重复拆单词
    memo[word] = res
    return res

def children(word, word_dict):
    """返回单词的子集
    word: str
    Returns: 子集的列表
    """
    res = []
    for i in range(len(word)):
        child = word[:i] + word[i+1:]
        if child in word_dict:
            res.append(child)
    return res

def all_reducible(word_dict):
    """检查字典中所有单词,返回可拆的单词
    word_dict: words作key的字典
    """
    res = []
    for word in word_dict:
        t = is_reducible(word, word_dict)
        if t != []:
            res.append(word)
    return res

def print_trail(word):
    """打印满足能拆到底的单词及其子集
    word: str
    """
    if len(word) == 0:
        return
    print(word, end=' ')
    t = is_reducible(word, word_dict)
    print_trail(t[0])

def print_longest_words(word_dict):
    """找到最长的单词并打印
    word_dict: 满足条件的词的词典
    """
    words = all_reducible(word_dict)
    t = []
    for word in words:
        t.append((len(word), word))
    t.sort(reverse=True)

    for _, word in t[0:1]:
        print_trail(word)
        print('\n')

word_dict = make_word_dict()
print(all_reducible(word_dict))
print_longest_words(word_dict)

案例研究:选择数据结构

13-1

Python
import string

def open_files(filename):
    """拆解为单词,去掉空字符和标点
    filename: 文件名
    return: 以字母为键的字典"""
    fin =  open(filename)
    d = dict()
    for sentence in fin:
        for punctuation in string.punctuation:
            sentence = sentence.replace(punctuation, ' ').lower().strip()
        words = sentence.split(' ')
        for word in words:
            if word not in d:
                d[word] = word
    return d
    

print(open_files('The Zen of python.txt'))

13-2

本来想直接调用之前写的histogram函数的,但是因为return的问题,最后还是决定重构了。

Python
import string

def histogram(filename):
    """拆解为单词,去掉空字符和标点
    filename: 文件名"""
    fin =  open(filename)
    d = dict()
    for sentence in fin:
        for punctuation in string.punctuation:
            sentence = sentence.replace(punctuation, ' ').lower().strip()
        words = sentence.split(' ')
        for word in words:
            d[word] = d.get(word, 0) + 1
    return d

def histogram_sum(d):
    word_count = 0
    for k, v in d.items():  
        word_count = word_count + v
    return word_count


print(histogram('The Zen of python.txt'))
print(histogram_sum(histogram('The Zen of python.txt')))

书上的示例:

Python
import string

def process_file(filename):
    """打开一个文件,返回单词的直方图
    需要调用process_line函数
    
    filename:str"""
    hist = dict()
    fp = open(filename)
    for line in fp:
        process_line(line, hist)
    return hist

def process_line(line, hist):
    line = line.replace('-', ' ')

    for word in line.split():
        word = word.strip(string.punctuation + string.whitespace)
        word = word.lower()
        hist[word] = hist.get(word, 0) + 1

hist = process_file('The Zen of python.txt')

13-3

Python
import string

def histogram(filename):
    """拆解为单词,去掉空字符和标点
    filename: 文件名"""
    fin =  open(filename)
    d = dict()
    for sentence in fin:
        for punctuation in string.punctuation:
            sentence = sentence.replace(punctuation, ' ').lower().strip()
        words = sentence.split(' ')
        for word in words:
            d[word] = d.get(word, 0) + 1
    return d


def print_frequently_words(d):
    """找到词频最高的20个单词
    这个函数使用12章练习的print_longest_words改的
    d: 直方图
    """
    t = []
    for k, v in d.items():
        t.append((v, k))
    t.sort(reverse=True)
    print(t[0:20])


print_frequently_words(histogram('The Zen of python.txt'))

​ 13-4

Python
import string

def histogram(filename):
    """拆解为单词,去掉空字符和标点
    filename: 文件名"""
    fin =  open(filename)
    d = dict()
    for sentence in fin:
        for punctuation in string.punctuation:
            sentence = sentence.replace(punctuation, ' ').lower().strip()
        words = sentence.split(' ')
        for word in words:
            d[word] = d.get(word, 0) + 1
    return d

def make_word_dict():
    """打开文件返回字典"""
    d = dict()
    fin = open('words.txt')
    for line in fin:
        word = line.strip().lower()
        d[word] = None
    for letter in ['a', 'i', '']:
        d[letter] = letter
    return d

def check_word(filename):
    book_word = histogram(filename)
    wordlist = make_word_dict()
    t = []
    for k, v in book_word.items():
        if k not in wordlist:
            print(k)

check_word('The Zen of python.txt')

random.randint(a, b): Return random integer in range [a, b], including both end points.

choice(seq): Choose a random element from a non-empty sequence.

13-5

Python
import random

def choose_from_hist(d):
    t = []
    for k, v in d.items():
        for i in range(v):
            t.append(k)
    return random.choice(t)

13-6

集合类型-set

Python
def process_file(filename, skip_header):
    """打开文件,跳过开头返回单词的直方图

    filename: str
    skip_header: bool,是否跳过Gutenberg的开头
   
    returns:单词的直方图字典
    """
    hist = {}
    fp = open(filename)

    if skip_header:
        skip_gutenberg_header(fp)

    for line in fp:
        if line.startswith('*** END OF THIS'):
            break

        process_line(line, hist)

    return hist

def subtract(d1, d2):
    """返回一个key在d1但不在d2的set

    d1, d2: dictionaries
    """
    return set(d1) - set(d2)


def main():
    hist = process_file('158-0.txt', skip_header=True)
    words = process_file('words.txt', skip_header=False)

    diff = subtract(hist, words)
    print("The words in the book that aren't in the word list are:")
    for word in diff:
        print(word, end=' ')


    main()

13-7

Python
import random

from bisect import bisect

def process_file(filename, skip_header):
    """打开文件,跳过开头返回单词的直方图

    filename: str
    skip_header: bool,是否跳过Gutenberg的开头
   
    returns:单词的直方图字典
    """
    hist = {}
    fp = open(filename)

    if skip_header:
        skip_gutenberg_header(fp)

    for line in fp:
        if line.startswith('*** END OF THIS'):
            break

        process_line(line, hist)

    return hist


def random_word(hist):
    """选择一个随机的单词

    其几率与其直方图的频率成正比

    hist: 直方图
    """

    words = []
    freqs = []
    total_freq = 0

    for word, freq in hist.items():
        total_freq += freq
        words.append(word)
        freqs.append(total_freq)

    x = random.randint(0, total_freq-1)
    index = bisect(freqs, x)
    return words[index]


def main():
    hist = process_file('158-0.txt', skip_header=True)

    print("\n\nHere are some random words from the book")
    for i in range(100):
        print(random_word(hist), end=' ')


if __name__ == '__main__':
    main()

练习13-8 13-9

我裂开了,以后再说

Working with Jupyter Notebooks in Visual Studio Code

最后编辑于
文章链接: http://pheustal.com/2019/12-17/thinkpython8
本作品采用CC-BY-SA许可。