-
Notifications
You must be signed in to change notification settings - Fork 2
/
redditpaper.pyw
executable file
·1432 lines (1193 loc) · 51.6 KB
/
redditpaper.pyw
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#! /usr/bin/env python3.4
# Created by Cameron Gagnon
# Version: beta
# Contact: [email protected] or
#
# If you have any questions, comments or concerns,
# or improvements to this code,
# feel free to email me and we can go from there.
# I'd love to help anyone use this to make their
# desktop backgrounds always look awesome.
# If you're having trouble using this program,
# please reach out and I can try
# my best to help you out!
#
# This is under the GNU GPL V3 so use it
# and modify it however you want
#
# Inspiration for this program came from:
# http://goo.gl/729Qdg
# http://goo.gl/2MD8tZ
# Check 'em out and give 'em some love
#what to import
import praw
import sqlite3
import pprint
import argparse
import configparser
import json
import os
import subprocess
import re
import sys
import time
import urllib.request
import subprocess
import logging
import logging.handlers
from detools import wallpaper
from PIL import Image
from bs4 import BeautifulSoup
from socket import timeout
from urllib.error import HTTPError,URLError
from requests.exceptions import ConnectionError
from collections import OrderedDict
#sets up global var
USERAGENT = "Reddit wallpaper changer script:v1.0 /u/camerongagnon"
# MANY DEFAULT VALUES ARE DECLARED GLOBAL IN THE PARSE ARGUMENTS
# FUNCTION TO SET UP THE VALUES FOR THE RUN OF THE PROGRAM
# declared as global in functions so we can
# decrement MAXPOSTS when we encounter an img
# that != width/height requirements. This is
# because in Cycle_wallpaper, it will cycle
# the list of images from 0 to MAXPOSTS
# make sure to have a file in the same directory with your username
# on the first line, and password on the second
def main(argList = None):
image_list = []
try:
# preliminary functions
try:
if log:
pass
except NameError:
# likely occurs when log is not
# defined, so we must define it
Config_logging()
args = Parse_cmd_args(argList)
config = Config.config(args)
Database()
sql, cur = Database.connect_to_DB()
log.debug("made database connection")
r = Connected("https://www.reddit.com/.json")
# this is the main function that will download and parse
# the images
Main_photo_controller(r, image_list)
Cycle_wallpaper(image_list)
sql.close()
# this is printed for ease of use when viewing the debug file
log.debug("################################################"
"################################################\n")
Config.writeStatusBar("")
except KeyboardInterrupt:
log.info("CTRL + C entered from command line, exiting...")
Config.writeStatusBar("")
sql.close()
sys.exit(0)
except:
log.debug("Unknown error occured", exc_info = True)
####################################################################
### CLASS IMPLEMNTATIONS
####################################################################
class BaseImg():
def setAsWallpaper(self):
"""
Sets the image as the wallpaper. This is called toward the
end of the program, so the image_name, and save location
should be set.
"""
try:
# call wallpaper program to set the image as the
# wallpaper
print(Config.downloadLoc() + self.image_name)
wallpaper.set_wallpaper(self.save_location)
statusStr = "Wallpaper should be set to: %s " % (self.image_name)
Config.writeStatusBar(statusStr)
log.debug(statusStr)
# sets the last wallpaper to the config file
config = Config.file_found()
if config:
config.set('Last Wallpaper', 'Wallpaper', self.image_name)
with open('settings.conf', 'w') as configfile:
config.write(configfile)
except KeyboardInterrupt:
sys.exit(0)
except:
log.exception("Error setting wallpaper, it is likely the "
"file path is not 100% correct. Make sure "
"there is a foward slash at the end of the "
"path in the SETWALLPAPER variable.", exc_info=True)
sys.exit(1)
class Img(BaseImg):
"""
Creates an img instance for each post found when returning
content from reddit, encapsulates title, id, image name, etc.
into one container to operate on.
"""
def __init__(self, post):
self.setProperties(post)
def setProperties(self, post):
self.setTitle(post.title)
self.setPost(post.permalink)
self.setLink(post.url)
self.setID(post.id)
self.setNSFW(post.over_18)
def setImgName(self, image_name):
self.image_name = image_name
self.setSaveLoc()
def setTitle(self, title):
self.title = title
def setLink(self, link):
self.link = link
def setPost(self, post):
self.post = post
def setID(self, id):
self.id = id
def setNSFW(self, nsfw):
self.nsfw = nsfw
def setSaveLoc(self):
self.save_location = Config.downloadLoc() + str(self.image_name)
def formatImgName(self):
# finds last '/' in url
remove = self.link.rindex('/')
# returns only past the last '/'
self.image_name = self.link[remove + 1:]
########################################################################
class SingleImg(Img):
"""
Downloads the image from the link manually entered by the user
"""
def __init__(self, link):
if link:
self.setLink(link)
flag = self.download(link)
if flag:
self.setAsWallpaper()
sql, cur = Database.connect_to_DB()
cur.execute('INSERT INTO oldposts (ImgName, ImgTitle,\
ImgLink, ImgPost) VALUES (?, ?, ?, ?)',
[self.image_name, self.title, self.link, self.link])
sql.commit()
def download(self, link):
""" Downloads the image to the save location """
try:
self.formatImgName()
self.setSaveLoc()
self.title = "User downloaded: " + self.image_name
except:
return False
# gets the pic download information
try:
picdl = urllib.request.Request(link, headers = {'User-Agent':USERAGENT})
except ValueError:
# invalid url loaded in probably
return False
try:
picdl = urllib.request.urlopen(picdl, cafile = 'cacert.pem')
except urllib.error.HTTPError:
log.exception("Could not open the specified picture webpage!!\n",
exc_info = True)
Config.writeStatusBar("Error downloading %s" % link)
sys.exit(0)
log.info("Downloading: %s \n\t\t\t\t\t\t as: %s "\
"\n\t\t\t\t\t\t to: %s",
self.link, self.image_name, self.save_location)
Config.writeStatusBar("Downloading %s" % link)
try:
with open(self.save_location, "wb") as picfile:
picfile.write(picdl.read())
return True
except FileNotFoundError:
return False
#######################################################################
class DBImg(BaseImg):
"""
Creates an encapsulation of data about previously downloaded
images by looking it up in the database. This is called to create
the image list for the past pictures page in the GUI
"""
def __init__(self, image_name):
self.setLookUpInfo(image_name)
def setLookUpInfo(self, image_name):
sql, cur = Database.connect_to_DB()
try:
cur.execute('SELECT ImgTitle, ImgLink, ImgPost, Width, Height\
FROM oldposts WHERE ImgName=?',
[image_name])
result = cur.fetchone()
self.title = result[0]
self.link = result[1]
self.post = result[2]
self.width = result[3]
self.height = result[4]
self.image_name = image_name
self.save_location = Config.downloadLoc() + self.image_name
except (sqlite3.OperationalError, TypeError):
log.debug("Error occured in making a DBImg()")
def updateSaveLoc(self):
self.thumb_save_loc_C = Config.downloadLoc() + self.thumb_name_C
self.thumb_save_loc_P = Config.downloadLoc() + self.thumb_name_P
def strip_file_ext(self):
"""
Used to remove the .jpg or other ending from im.image_name
so that we can resave the thumbnail with .png
"""
index = self.image_name.rfind('.')
self.thumb_name = self.image_name[:index]
self.add_P()
self.add_C()
self.add_png()
def add_P(self):
self.thumb_name_P = self.thumb_name + "_P.png"
def add_C(self):
self.thumb_name_C = self.thumb_name + "_C.png"
def add_png(self):
"""
Appends the .png to the end of im.image_name to save the
thumbnail with .png
"""
self.thumb_name = self.thumb_name + ".png"
########################################################################
class PictureList():
"""
Returns information/list of images that have been downloaded
by the program for use in the GUI to display the past images
"""
def list_pics():
sql, cur = Database.connect_to_DB()
image_list = []
try:
cur.execute('SELECT * FROM oldposts')
except sqlite3.OperationalError:
# return empty list so when iterating the fn
# has no objects to iterate over
log.debug("No images in database to select from currently")
return image_list
results = cur.fetchall()
for image in results:
pic = DBImg(image[1]) # image[1] is ImgName
image_list.append(pic)
return image_list
########################################################################
class AboutInfo():
_version = "1.0"
def version():
return AboutInfo._version
########################################################################
class Database():
def __init__(self):
global cur
global sql
"""
creates the database if it does not exist already
"""
log.info("Accessing database for submission ID's")
sql, cur = Database.connect_to_DB()
# create image database
cur.execute('CREATE TABLE IF NOT EXISTS oldposts(ID TEXT,\
ImgName TEXT, ImgTitle TEXT, ImgLink TEXT,\
ImgPost TEXT, Width INT, Height INT)')
# commit dem changes yo
sql.commit()
# connects to the wallpaper.db which holds the image info
def connect_to_DB():
sql = sqlite3.connect('wallpaper.db')
cur = sql.cursor()
return sql, cur
#REQUIRES id of submission to insert
#MODIFIES database of id's already downloaded
#EFFECTS Inserts the submission id into the database after a
# successful download
def Insert_ImgDB(im):
log.debug("Data to insert\n\t\t\t\t\t\t id: %s"
"\n\t\t\t\t\t\t image_name: %s"
"\n\t\t\t\t\t\t title: %s"
"\n\t\t\t\t\t\t Post: %s"
"\n\t\t\t\t\t\t link: %s",
im.id, im.image_name, im.title, im.post, im.link)
cur.execute('INSERT INTO oldposts (ID, ImgName, ImgTitle,\
ImgPost, ImgLink) VALUES (?, ?, ?, ?, ?)',
[im.id, im.image_name, im.title, im.post, im.link])
sql.commit()
# REQUIRES: valid width/heights to be updated in DB
# MODIFIES: width and height of specified image
# EFFECTS: updates DB with width and height of image
def updateWH(im, width, height):
log.debug("Updating %s with width: %s and height: %s" %\
(im.image_name, width, height))
cur.execute('UPDATE oldposts SET Width=?, Height=? WHERE ImgName=?',
[width, height, im.image_name])
sql.commit()
# REQUIRES: valid name in oldposts
# MODIFIES: oldposts DB table
# EFFECTS: removes the image and its associated data from the
# database.
def del_img(image_name):
try:
sql, cur = Database.connect_to_DB()
log.debug("Deleting %s from database" % image_name)
cur.execute('DELETE FROM oldposts WHERE ImgName = ?', [image_name])
sql.commit()
except:
rp.log("ERROR WHILE DELETING DB OBJECT", exc_info = True)
sys.exit(1)
###########################################################################
class Config():
"""
Values used to initiate the settings file.
"""
dir_ = os.path.expanduser("~") + "\\Pictures\\RedditPaper\\"
try:
# tries to create this directory, if it already exists
# then we're good to go
os.makedirs(dir_)
except FileExistsError:
pass
except:
# excepts any other error and creates an image folder
# in the directory where the program was downloaded to
dir_ = os.getcwd() + "\\Downloaded Images\\"
default_values = {'DWNLDLOC': dir_,
'MINWIDTH': 1024,
'MINHEIGHT': 768,
'SUBREDDITS': "futureporn+earthporn+"
"technologyporn+spaceporn+"
"imaginarystarscapes+lavaporn",
'CATEGORY': "hot",
'CYCLETIME' : 0.05,
'MAXPOSTS': 5,
'NSFW': False,
'WALLPAPER': '',
'STATUSBAR': ''
}
def config(args):
"""
Updates/creates the config file with the default/new values
determined by the dict that is passed in
"""
# split up the jumble of time to set the hr and min correctly
args['CYCLEHR'], args['CYCLEMIN']=Config.format_time(args['CYCLETIME'])
# convert NSFW from on/off to True/False
args['NSFW'] = Config.convert_NSFW(args['NSFW'])
config = configparser.ConfigParser()
config['Statusbar'] = OrderedDict([('Statusbar Text',
args['STATUSBAR'])])
config['Save Location'] = OrderedDict([('Directory',
args['DWNLDLOC'])])
config['Options'] = OrderedDict([('Minwidth', args['MINWIDTH']),
('Minheight', args['MINHEIGHT']),
('Subreddits', args['SUBREDDITS']),
('Category', args['CATEGORY']),
('Maxposts', args['MAXPOSTS'])])
config['Cycletime'] = OrderedDict([('Hours', args['CYCLEHR']),
('Minutes', args['CYCLEMIN'])])
config['Adult Content'] = OrderedDict([('NSFW', args['NSFW'])])
# this try/except is used because the cmdline args come through here
# and doesn't contain the wallpaper argument, so it is not always
# provided
try:
config['Last Wallpaper'] = OrderedDict([('Wallpaper',
args['WALLPAPER'])])
except KeyError:
configParser = configparser.ConfigParser()
# this is cyclical because we must read the setting in order
# to reset it with the same value, as this file is rewritten
# each call to this function. This is so we can pass one of
# two dictionarys to it without rewriting similar code.
# ^^CLArgs or DefaultValues
args['WALLPAPER'] = Config.lastImg()
config['Last Wallpaper'] = OrderedDict([('Wallpaper',
args['WALLPAPER'])])
with open('settings.conf', 'w') as configfile:
config.write(configfile)
log.debug("Set config file")
def convert_NSFW(nsfw):
log.debug("nsfw in convert is: %s " % nsfw)
if nsfw:
return True
return False
def format_time(time):
"""
Converts the minutes only time to hours and minutes for
use when updating the config file
"""
hr = float(time//60)
min_ = float(time % 60)
return hr, min_
def file_found():
"""
Returns true if the file exists, otherwise it returns false
"""
config = configparser.ConfigParser()
if config.read("settings.conf") == []:
log.debug("Settings.conf does not exist.")
return False
else:
return config
def read_config():
"""
Reads the values from the config file and also will call config if
a new file needs to be created. With the read in values, it passes
them to parse_cmd_args() to set as default values, since that is
the point of the config file.
"""
global URL
# create default config file if not created
config = Config.file_found()
# if the config file does not exist, create it
# and make sure config var is set to good parser
if not config:
log.debug("Creating configuration file")
Config.config(Config.default_values)
config = configparser.ConfigParser()
config.read('settings.conf')
# get values stored in settings.conf
args = {}
args['SUBREDDITS'] = config.get('Options', 'Subreddits',
fallback = "futureporn+wallpapers+lavaporn+"
"earthporn+imaginarystarscapes+spaceporn")
args['MINWIDTH'] = config.getint('Options', 'Minwidth', fallback = 1024)
args['MINHEIGHT'] = config.getint('Options', 'Minheight',
fallback = 768)
args['MAXPOSTS'] = config.getint('Options', 'Max posts',
fallback = 5)
args['CYCLETIME'] = config.getfloat('Cycletime', 'Minutes',
fallback = 0.05)
# must convert minutes and hours to only minutes as that's how the
# cycle time works
hours = config.getfloat('Cycletime', 'Hours', fallback = 0)
args['CYCLETIME'] = hours * 60 + args['CYCLETIME']
args['CATEGORY'] = config.get('Options', 'Category', fallback = "hot")
args['NSFW'] = config.getboolean('Adult Content', 'NSFW',
fallback = False)
dir_ = os.getcwd() + "\\Downloaded Images\\"
args['DWNLDLOC'] = config.get('Save Location', 'Directory',
fallback = dir_)
URL = "https://www.reddit.com/r/" + args['SUBREDDITS'] + "/" + \
args['CATEGORY'] + "/"
return args
def minwidth():
"""
returns the value specified by the method name from the
settings.conf file. These methods are mostly used in the
GUI to insert the values stored in settings.conf into the
Entries on the GUI.
"""
config = Config.file_found()
if config:
minwidth = config.getint('Options', 'Minwidth')
return minwidth
# this is so we don't break anything if no value is set
# for this particular value in settings.conf
return ""
def minheight():
config = Config.file_found()
if config:
minheight = config.getint('Options', 'Minheight')
return minheight
return ""
def cycletime():
config = Config.file_found()
if config:
min_ = config.getfloat('Cycletime', 'Minutes')
hr = config.getfloat('Cycletime', 'Hours')
return hr, min_
return "", ""
def downloadLoc():
config = Config.file_found()
if config:
downloadLoc = config.get('Save Location', 'Directory')
return downloadLoc
return ""
def nsfw():
config = Config.file_found()
if config:
nsfw = config.getboolean('Adult Content', 'NSFW')
return nsfw
return False
def subreddits():
config = Config.file_found()
if config:
subreddits = config.get('Options', 'Subreddits')
subreddits = subreddits.replace("+", " ")
return subreddits
return ""
def category():
config = Config.file_found()
if config:
category = config.get('Options', 'Category')
firstLetter = category[0].upper()
category = firstLetter + category[1:]
log.debug("Category is: %s" % category)
return category
return "Hot"
def maxposts():
config = Config.file_found()
if config:
maxposts = config.getint('Options', 'Maxposts')
return maxposts
return ""
def lastImg():
config = Config.file_found()
if config:
lastImg = config.get('Last Wallpaper', 'Wallpaper')
return lastImg
return ""
def statusBar():
config = Config.file_found()
if config:
statusText = config.get('Statusbar', 'Statusbar Text')
return statusText
return ""
def writeStatusBar(statusText):
config = Config.file_found()
if config:
config['Statusbar'] = OrderedDict([('Statusbar Text',
statusText)])
with open('settings.conf', 'w+') as configfile:
config.write(configfile)
####################################################################
### FUNCTION IMPLEMENTATIONS
####################################################################
def Config_logging():
""" Configures the logging to external file """
global log
# set file logger
rootLog = logging.getLogger('')
rootLog.setLevel(logging.DEBUG)
# set format for output to file
formatFile = logging.Formatter(fmt='%(asctime)-s %(levelname)-6s: '\
'%(lineno)d : %(message)s',
datefmt='%m-%d %H:%M')
# add filehandler so once the filesize reaches 5MB a new file is
# created, up to 3 files
fileHandle = logging.handlers.RotatingFileHandler("CrashReport.log",
maxBytes=5000000,
backupCount=3,
encoding = "utf-8")
fileHandle.setFormatter(formatFile)
rootLog.addHandler(fileHandle)
# configures logging to console
# set console logger
console = logging.StreamHandler()
console.setLevel(logging.DEBUG) #toggle console level output with this line
# set format for console logger
consoleFormat = logging.Formatter('%(levelname)-6s %(message)s')
console.setFormatter(consoleFormat)
# add handler to root logger so console && file are written to
logging.getLogger('').addHandler(console)
log = logging.getLogger('reddit-paper')
#####################################################################
#REQUIRES url
#MODIFIES nothing
#EFFECTS returns true if able to connect to specified url, returns
# false if not able to connect, or timesout
def Connected(url):
r = praw.Reddit(user_agent = USERAGENT)
try:
uaurl = urllib.request.Request(url,
headers={'User-Agent' : USERAGENT})
url = urllib.request.urlopen(uaurl,
timeout = 3,
cafile = 'cacert.pem')
content = url.read().decode('utf-8')
json.loads(content)
url.close()
Config.writeStatusBar("Connecting to Reddit...")
# Error that usually occurs when there is no internet connection
except URLError as e:
Config.writeStatusBar("Not connected to the internet.")
log.error("Not connected to the internet. Check "
"your internet connection and try again.")
log.debug("Error is: %s" % e)
sys.exit(0)
except (HTTPError, timeout, AttributeError, ValueError) as e:
Config.writeStatusBar("Not connected to reddit.com. Sign in to"
"internet and try again.")
log.error("You do not appear to be connected to Reddit.com."
" This is likely due to a redirect by the internet connection"
" you are on. Check to make sure no login is required and the"
" connection is stable, and then try again.")
log.debug("Error is: %s" % e)
sys.exit(0)
return r
####################################################################
#MODIFIES Downloads the image specified by the user
#EFFECTS Sets image as wallpaper
def Single_link(link):
if link:
SingleImg(link)
####################################################################
#REQUIRES img_link
#MODIFIES img_link
#EFFECTS Performs operations on url to derive image name and then
# returns the img_name
def General_parser(img_link):
if img_link == []:
return False
try:
remove_index = img_link.rindex('/')
except ValueError:
# occurs when index is not found
log.debug("'/' in img_link is not found", exc_info = True)
return False
image_name = img_link[remove_index + 1:]
# checks for file format ending, and appends .jpg
# if none is found
if image_name.rfind('.') == -1:
image_name = image_name + ".jpg"
log.debug("Image name is: {}".format(image_name))
index = image_name.rfind('.jpg?')
if index != -1:
# strips off '?1020a0747' from some image names
# that have misc. characters after the .jpg
image_name = image_name[:index + 4]
return image_name
####################################################################
#REQUIRES url
#MODIFIES url
#EFFECTS Returns the static download URL of the file, specific
# to Flickr. This SO post helped:
# https://stackoverflow.com/questions/21673323/download-flickr-images-of-specific-url
#
# A list of titles and how to determine size based on ending characters -
# _o (original file) is used here as it is most reliable,
# although sometimes a very large
# https://www.flickr.com/services/api/misc.urls.html
def Flickr_parse(url):
try:
# gets the page and reads the hmtl into flickr_html
flickr_html = urllib.request.urlopen(url, cafile = 'cacert.pem').read()
# searches for static flickr url within webpage
flickr_html = flickr_html.decode('utf-8')
# at the moment, BeautifulSoup would be too difficult to
# use to parse the html for the link, as the link is not within
# standard html anyway. (It's located in 'Model Export' towards the
# bottom of the page)
img_link = re.findall(r"""
farm # farm is always in static img url
[^":]* # characters to not capture
_[o|k|h|b]\. # _o indicates original img per
# flickr standards
[jpg|png|gif]* # file format is either
# png, jpg, or gif
""", flickr_html, re.VERBOSE)[0]
url = 'https://' + img_link
# finds urls with \ in them so we must remove them
url = url.replace('\\', '')
log.debug("img_link from flickr regex: %s", img_link)
#generates image_name from static url
return General_parser(img_link), url
except KeyboardInterrupt:
sys.exit(0)
# no links/an error occured in finding links in html of page
except (IndexError,TypeError):
log.debug("Did not find any links in Flickr_parse")
# this (UnicodeDecodeError) is thrown when the file link is
# given to read, and cannot be decoded to "utf-8" therefore,
# we just need to download the img normally anyway
except UnicodeDecodeError:
return General_parser(url), url
except Exception:
log.warning("Exception occured, or image does not fit"
" required size in Flickr_parse",
exc_info = True)
return False, False
####################################################################
#REQUIRES url
#MODIFIES url, image_name
#EFFECTS Returns the image_name and url of the correct link to
# download from. 500px.com sometimes 'protects' the photos
# so they are not as easily programatically downloaded,
# however the links in the html provide the 'static' download
# link
def Five00px_parse(url):
try:
#refer to Flickr_parse for explanation of this method
px_html = urllib.request.urlopen(url, cafile = 'cacert.pem')
img_html = BeautifulSoup(px_html)
# finds the html with class 'the_photo' and returns the src of that elt
img_link = img_html.select('.the_photo')[0].get('src')
url = img_link
return General_parser(img_link), url
except KeyboardInterrupt:
sys.exit(0)
except (IndexError,TypeError):
log.debug("No links found in Five00px_parse")
except UnicodeDecodeError:
return General_parser(url), url
except Exception:
log.warning("Exception occured in Five00px_parse",
exc_info = True)
return False, False
####################################################################
# Very similar to Five00px_parse and Flickr_parse, look through there
# for details of workings on this method
def Deviant_parse(url, regex):
try:
dev_html = urllib.request.urlopen(url, cafile = 'cacert.pem')
# direct image download link that must begin with
# fc or orig or pre
if regex[:2] == "fc" or regex[:4] == "orig" or\
regex[:3] == "pre" or regex[:3] == "img":
return General_parser(url), url
else:
img_html = BeautifulSoup(dev_html)
# finds all classes with 'dev-content-normal' and finds the src
# attribute of it
img_link = img_html.select('.dev-content-normal')[0].get('src')
url = img_link
return General_parser(url), url
except KeyboardInterrupt:
sys.exit(0)
except (IndexError, TypeError):
log.debug("No links found in Deviant_parse")
return False, False
# this exception is when the good img url to download is
# passed in. Since this url when opened is not html, it throws
# this error, so we know we must find the image title and return
# the url passed in
except UnicodeDecodeError:
return General_parser(url), url
except Exception:
log.warning("Exception occured in Deviant_parse",
exc_info = True)
return False, False
####################################################################
# REQUIRES url for earlycanvas parsing
# MODIFIES the link that gets passed as the download link
# EFFECTS Returns the direct link to download the image
def Early_canvas_parser(url):
html = urllib.request.urlopen(url, cafile = 'cacert.pem')
html = BeautifulSoup(html)
div = html.select('.item-image')[0]
url = div.findChildren()[0].get('src')
return url
####################################################################
# REQUIRES valid imgur url
# MODIFIES image_name
# EFFECTS Retrives direct image link from imgur.com when posted
# as either an album, or gallery.
# An album example: https://imgur.com/a/dLB0
# A gallery example: https://imgur.com/gallery/fDdj6hw
def Imgur_parse(url, regex):
# check if it's a gif or not from imgur. These don't
# download/display
if (url.rfind(".gif") != -1) or (url.rfind(".gifv") != -1):
log.debug("Image is likely a gif or gifv, not downloading")
return False, False
# then check if it's a direct link
elif regex == "i.imgur.com":
image_name = General_parser(url)
return image_name, url
# check if an imgur.com/gallery link
elif (url.find('/gallery/') != -1):
image_name = General_parser(url)
url = "https://i.imgur.com/" + image_name
return image_name, url
# /a/ means an album in imgur standards
elif (url.find('/a/') != -1):
# have to find new url to download the first image from album
uaurl = urllib.request.Request(url, headers = {'User-Agent': USERAGENT})
imgur_html = urllib.request.urlopen(uaurl, cafile = 'cacert.pem')
soup = BeautifulSoup(imgur_html)
# | class=image w/ child <a> | gets href of this <a> child |
url = soup.select('.image a')[0].get('href')
url = "https:" + url
image_name = General_parser(url)
return image_name, url
# a regular imgur.com domain but no img type in url
elif regex == "imgur.com":
image_name = General_parser(url)
url = "https://i.imgur.com/" + image_name
return image_name, url
# if we get here, there's likely a format of url error
else:
log.debug("Something went wrong in Imgur_parse")
return False, False
####################################################################
#REQUIRES url of image to be renamed
#MODIFIES nothing
#EFFECTS Outputs the image title and URL of the photo being downloaded
# instead of the long URL it comes in as
def Title_from_url(im):
try:
#finds last forward slash in index and then slices
#the url up to that point + 1 to just get the image
#title
regex_result = re.findall(r'^(?:https?:\/\/)?(?:www\.)?([^\/]+)',\
im.link, re.IGNORECASE)
log.debug("Regex (domain) from URL is: %s ", regex_result)
# imgur domain
if regex_result[0] == "imgur.com" or \
regex_result[0] == "i.imgur.com":
# check if we encountered bad data such as a gif or gifv
image_name, url = Imgur_parse(im.link, regex_result[0])
if image_name:
return image_name, url, True
else:
return False, False, False
# staticflickr domain
elif (regex_result[0].find("staticflickr") != -1):
im.formatImgName()
return im.image_name, im.link, True
# flickr domain
elif (regex_result[0].find("flickr") != -1):
image_name, url = Flickr_parse(im.link)
return image_name, url, True
# 500px domain
elif (regex_result[0].find("500px.com") != -1):