summercodejam2020qualifier/qualifier.py at master · raymondphillips/summercodejam2020qualifier · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
"""
Use this file to write your solution for the Summer Code Jam 2020 Qualifier.

Important notes for submission:

- Do not change the names of the two classes included below. The test suite we
  will use to test your submission relies on existence these two classes.

- You can leave the `ArticleField` class as-is if you do not wish to tackle the
  advanced requirements.

- Do not include "debug"-code in your submission. This means that you should
  remove all debug prints and other debug statements before you submit your
  solution.
"""
from collections import Counter
import datetime
import typing


class ArticleField:
    """The `ArticleField` class for the Advanced Requirements."""

    def __init__(self, field_type: typing.Type[typing.Any]):
        pass


class Article:
    """The `Article` class you need to write for the qualifier."""
    title = ''
    author = ''
    publication_date = ''
    content = ''

    def __init__(self, title: str, author: str, publication_date: datetime.datetime, content: str):
      self.title = title
      self.author = author
      self.publication_date = publication_date
      self.content = content

    def __repr__(self):
      return '<Article title="{title}" author=\'{author}\' publication_date=\'{date}\'>'.format(title = self.title, author = self.author, date = self.publication_date.isoformat())

    def __len__(self):
      return len(self.content)

    def short_introduction(self, n_characters: int):
      newStr = self.content[:n_characters]
      period = newStr.rfind('.',0)
      space = newStr.rfind(' ',0)
      if period > space:
        return newStr[:period+1]
      else:
        return newStr[:space]
      return newStr

    def most_common_words(self, n_words: int):
      str = self.content.lower()
      wordList2 =[]
      wordList1 = str.split()
      for word in wordList1:
          cleanWord = ""
          for char in word:
              if char in '!\',.?":;0123456789@#$%^&*()\{\}[]\\></|_-+=`~':
                  char = ""
              cleanWord += char
          wordList2.append(cleanWord)

      str_list = wordList2
      most_occur = Counter(str_list).most_common(n_words)
      common = {}
      first = []
      second = []

      for a_tuple in most_occur:
        first.append(a_tuple[0])
      for b_tuple in most_occur:
        second.append(b_tuple[1])
      for key in first:
        for value in second:
          common[key] = value
          second.remove(value)
          break
      return common