diff --git a/.travis.yml b/.travis.yml index dd52ed0d..abf64bf2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,7 @@ language: python matrix: include: - - python: 3.6.7 + - python: 3.7.2 dist: xenial sudo: true diff --git a/Pipfile b/Pipfile index 7941c829..15f3e288 100644 --- a/Pipfile +++ b/Pipfile @@ -5,22 +5,20 @@ name = "pypi" [packages] appdirs = "*" -requests = "*" -six = "*" requests_oauthlib = "*" # pin a beta version which fixes vunerability: # https://nvd.nist.gov/vuln/detail/CVE-2017-18342 # todo won't accept >= for some reason so will need changing in future pyyaml = "==4.2b1" -# pyyaml = "*" -piexif = "*" -python-magic = "*" +exif = "*" +selenium = "*" gphotos-sync = {path = "."} -coverage = "*" [dev-packages] mock = "*" pytest = "*" +coverage = "*" +gphotos-sync = {path = "."} [requires] -python_version = "3.6" +python_version = "3.7" diff --git a/Pipfile.lock b/Pipfile.lock index 378aecf7..d32cb88c 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,11 +1,11 @@ { "_meta": { "hash": { - "sha256": "14b1ab4bd14871d3fc40324fe0f642b3fc3d9f71172b117d366582da284c7e6c" + "sha256": "8fb7d97784e1d6eea05811123d2dc6166e2e93ddc5c30cafa41c86f7f8fa3ee4" }, "pipfile-spec": 6, "requires": { - "python_version": "3.6" + "python_version": "3.7" }, "sources": [ { @@ -26,10 +26,10 @@ }, "certifi": { "hashes": [ - "sha256:47f9c83ef4c0c621eaef743f133f09fa8a74a9b75f037e8624f83bd1b6626cb7", - "sha256:993f830721089fef441cdfeb4b2c8c9df86f0c63239f06bd025a76a7daddb033" + "sha256:59b7658e26ca9c7339e00f8f4636cdfe59d34fa37b9b04f6f9e9926b3cece1a5", + "sha256:b26104d6835d1f5e49452a26eb2ff87fe7090b89dfcaee5ea2212697e1e1d7ae" ], - "version": "==2018.11.29" + "version": "==2019.3.9" }, "chardet": { "hashes": [ @@ -38,42 +38,23 @@ ], "version": "==3.0.4" }, - "coverage": { + "enum34": { + "hashes": [ + "sha256:2d81cbbe0e73112bdfe6ef8576f2238f2ba27dd0d55752a776c41d38b7da2850", + "sha256:644837f692e5f550741432dd3f223bbb9852018674981b1664e5dc339387588a", + "sha256:6bd0f6ad48ec2aa117d3d141940d484deccda84d4fcd884f5c3d93c23ecd8c79", + "sha256:8ad8c4783bf61ded74527bffb48ed9b54166685e4230386a9ed9b1279e2df5b1" + ], + "version": "==1.1.6" + }, + "exif": { "hashes": [ - "sha256:09e47c529ff77bf042ecfe858fb55c3e3eb97aac2c87f0349ab5a7efd6b3939f", - "sha256:0a1f9b0eb3aa15c990c328535655847b3420231af299386cfe5efc98f9c250fe", - "sha256:0cc941b37b8c2ececfed341444a456912e740ecf515d560de58b9a76562d966d", - "sha256:10e8af18d1315de936d67775d3a814cc81d0747a1a0312d84e27ae5610e313b0", - "sha256:1b4276550b86caa60606bd3572b52769860a81a70754a54acc8ba789ce74d607", - "sha256:1e8a2627c48266c7b813975335cfdea58c706fe36f607c97d9392e61502dc79d", - "sha256:2b224052bfd801beb7478b03e8a66f3f25ea56ea488922e98903914ac9ac930b", - "sha256:447c450a093766744ab53bf1e7063ec82866f27bcb4f4c907da25ad293bba7e3", - "sha256:46101fc20c6f6568561cdd15a54018bb42980954b79aa46da8ae6f008066a30e", - "sha256:4710dc676bb4b779c4361b54eb308bc84d64a2fa3d78e5f7228921eccce5d815", - "sha256:510986f9a280cd05189b42eee2b69fecdf5bf9651d4cd315ea21d24a964a3c36", - "sha256:5535dda5739257effef56e49a1c51c71f1d37a6e5607bb25a5eee507c59580d1", - "sha256:5a7524042014642b39b1fcae85fb37556c200e64ec90824ae9ecf7b667ccfc14", - "sha256:5f55028169ef85e1fa8e4b8b1b91c0b3b0fa3297c4fb22990d46ff01d22c2d6c", - "sha256:6694d5573e7790a0e8d3d177d7a416ca5f5c150742ee703f3c18df76260de794", - "sha256:6831e1ac20ac52634da606b658b0b2712d26984999c9d93f0c6e59fe62ca741b", - "sha256:77f0d9fa5e10d03aa4528436e33423bfa3718b86c646615f04616294c935f840", - "sha256:828ad813c7cdc2e71dcf141912c685bfe4b548c0e6d9540db6418b807c345ddd", - "sha256:85a06c61598b14b015d4df233d249cd5abfa61084ef5b9f64a48e997fd829a82", - "sha256:8cb4febad0f0b26c6f62e1628f2053954ad2c555d67660f28dfb1b0496711952", - "sha256:a5c58664b23b248b16b96253880b2868fb34358911400a7ba39d7f6399935389", - "sha256:aaa0f296e503cda4bc07566f592cd7a28779d433f3a23c48082af425d6d5a78f", - "sha256:ab235d9fe64833f12d1334d29b558aacedfbca2356dfb9691f2d0d38a8a7bfb4", - "sha256:b3b0c8f660fae65eac74fbf003f3103769b90012ae7a460863010539bb7a80da", - "sha256:bab8e6d510d2ea0f1d14f12642e3f35cefa47a9b2e4c7cea1852b52bc9c49647", - "sha256:c45297bbdbc8bb79b02cf41417d63352b70bcb76f1bbb1ee7d47b3e89e42f95d", - "sha256:d19bca47c8a01b92640c614a9147b081a1974f69168ecd494687c827109e8f42", - "sha256:d64b4340a0c488a9e79b66ec9f9d77d02b99b772c8b8afd46c1294c1d39ca478", - "sha256:da969da069a82bbb5300b59161d8d7c8d423bc4ccd3b410a9b4d8932aeefc14b", - "sha256:ed02c7539705696ecb7dc9d476d861f3904a8d2b7e894bd418994920935d36bb", - "sha256:ee5b8abc35b549012e03a7b1e86c09491457dba6c94112a2482b18589cc2bdb9" + "sha256:ad276268e191f5d153c4350dbacae5534cfbd846ab3636d1012c724e8e37faad", + "sha256:b377d837df7f700d4cfc4e421d9b0d70695b84683ac078b3d1c2c14fff089e2c", + "sha256:e624665a3c7f7a98f24d0379e6566204b7047b3afa1349824ec998ed55cef1c7" ], "index": "pypi", - "version": "==4.5.2" + "version": "==0.3.1" }, "gphotos-sync": { "path": "." @@ -92,22 +73,6 @@ ], "version": "==3.0.1" }, - "piexif": { - "hashes": [ - "sha256:22e40356e4b4d5069787caa010ae77779c7cf8fc875ba43103ded2a2fc7b0c5f", - "sha256:60cd973f7b5a6d999c3bb60b784745c8d5345486ef62708ffdc5cc4a78e44636" - ], - "index": "pypi", - "version": "==1.1.2" - }, - "python-magic": { - "hashes": [ - "sha256:f2674dcfad52ae6c49d4803fa027809540b130db1dec928cfbb9240316831375", - "sha256:f3765c0f582d2dfc72c15f3b5a82aecfae9498bd29ca840d72f37d7bd38bfcd5" - ], - "index": "pypi", - "version": "==0.4.15" - }, "pyyaml": { "hashes": [ "sha256:ef3a0d5a5e950747f4a39ed7b204e036b37f9bddc7551c1a813b8727515a832e" @@ -120,7 +85,6 @@ "sha256:502a824f31acdacb3a35b6690b5fbf0bc41d63a24a45c4004352b0242707598e", "sha256:7bf2a778576d825600030a110f3c0e3e8edc51dfaafe1c146e39a2027784957b" ], - "index": "pypi", "version": "==2.21.0" }, "requests-oauthlib": { @@ -131,13 +95,13 @@ "index": "pypi", "version": "==1.2.0" }, - "six": { + "selenium": { "hashes": [ - "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", - "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73" + "sha256:2d7131d7bc5a5b99a2d9b04aaf2612c411b03b8ca1b1ee8d3de5845a9be2cb3c", + "sha256:deaf32b60ad91a4611b98d8002757f29e6f2c2d5fcaf202e1c9ad06d6772300d" ], "index": "pypi", - "version": "==1.12.0" + "version": "==3.141.0" }, "urllib3": { "hashes": [ @@ -162,6 +126,46 @@ ], "version": "==19.1.0" }, + "coverage": { + "hashes": [ + "sha256:3684fabf6b87a369017756b551cef29e505cb155ddb892a7a29277b978da88b9", + "sha256:39e088da9b284f1bd17c750ac672103779f7954ce6125fd4382134ac8d152d74", + "sha256:3c205bc11cc4fcc57b761c2da73b9b72a59f8d5ca89979afb0c1c6f9e53c7390", + "sha256:465ce53a8c0f3a7950dfb836438442f833cf6663d407f37d8c52fe7b6e56d7e8", + "sha256:48020e343fc40f72a442c8a1334284620f81295256a6b6ca6d8aa1350c763bbe", + "sha256:5296fc86ab612ec12394565c500b412a43b328b3907c0d14358950d06fd83baf", + "sha256:5f61bed2f7d9b6a9ab935150a6b23d7f84b8055524e7be7715b6513f3328138e", + "sha256:68a43a9f9f83693ce0414d17e019daee7ab3f7113a70c79a3dd4c2f704e4d741", + "sha256:6b8033d47fe22506856fe450470ccb1d8ba1ffb8463494a15cfc96392a288c09", + "sha256:7ad7536066b28863e5835e8cfeaa794b7fe352d99a8cded9f43d1161be8e9fbd", + "sha256:7bacb89ccf4bedb30b277e96e4cc68cd1369ca6841bde7b005191b54d3dd1034", + "sha256:839dc7c36501254e14331bcb98b27002aa415e4af7ea039d9009409b9d2d5420", + "sha256:8f9a95b66969cdea53ec992ecea5406c5bd99c9221f539bca1e8406b200ae98c", + "sha256:932c03d2d565f75961ba1d3cec41ddde00e162c5b46d03f7423edcb807734eab", + "sha256:988529edadc49039d205e0aa6ce049c5ccda4acb2d6c3c5c550c17e8c02c05ba", + "sha256:998d7e73548fe395eeb294495a04d38942edb66d1fa61eb70418871bc621227e", + "sha256:9de60893fb447d1e797f6bf08fdf0dbcda0c1e34c1b06c92bd3a363c0ea8c609", + "sha256:9e80d45d0c7fcee54e22771db7f1b0b126fb4a6c0a2e5afa72f66827207ff2f2", + "sha256:a545a3dfe5082dc8e8c3eb7f8a2cf4f2870902ff1860bd99b6198cfd1f9d1f49", + "sha256:a5d8f29e5ec661143621a8f4de51adfb300d7a476224156a39a392254f70687b", + "sha256:aca06bfba4759bbdb09bf52ebb15ae20268ee1f6747417837926fae990ebc41d", + "sha256:bb23b7a6fd666e551a3094ab896a57809e010059540ad20acbeec03a154224ce", + "sha256:bfd1d0ae7e292105f29d7deaa9d8f2916ed8553ab9d5f39ec65bcf5deadff3f9", + "sha256:c62ca0a38958f541a73cf86acdab020c2091631c137bd359c4f5bddde7b75fd4", + "sha256:c709d8bda72cf4cd348ccec2a4881f2c5848fd72903c185f363d361b2737f773", + "sha256:c968a6aa7e0b56ecbd28531ddf439c2ec103610d3e2bf3b75b813304f8cb7723", + "sha256:df785d8cb80539d0b55fd47183264b7002077859028dfe3070cf6359bf8b2d9c", + "sha256:f406628ca51e0ae90ae76ea8398677a921b36f0bd71aab2099dfed08abd0322f", + "sha256:f46087bbd95ebae244a0eda01a618aff11ec7a069b15a3ef8f6b520db523dcf1", + "sha256:f8019c5279eb32360ca03e9fac40a12667715546eed5c5eb59eb381f2f501260", + "sha256:fc5f4d209733750afd2714e9109816a29500718b32dd9a5db01c0cb3a019b96a" + ], + "index": "pypi", + "version": "==4.5.3" + }, + "gphotos-sync": { + "path": "." + }, "mock": { "hashes": [ "sha256:5ce3c71c5545b472da17b72268978914d0252980348636840bd34a00b5cc96c1", @@ -201,18 +205,17 @@ }, "pytest": { "hashes": [ - "sha256:067a1d4bf827ffdd56ad21bd46674703fce77c5957f6c1eef731f6146bfcef1c", - "sha256:9687049d53695ad45cf5fdc7bbd51f0c49f1ea3ecfc4b7f3fde7501b541f17f4" + "sha256:592eaa2c33fae68c7d75aacf042efc9f77b27c08a6224a4f59beab8d9a420523", + "sha256:ad3ad5c450284819ecde191a654c09b0ec72257a2c711b9633d677c71c9850c4" ], "index": "pypi", - "version": "==4.3.0" + "version": "==4.3.1" }, "six": { "hashes": [ "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73" ], - "index": "pypi", "version": "==1.12.0" } } diff --git a/README.rst b/README.rst index a72ed167..c2b36c43 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,7 @@ After doing a full sync you will have 2 directories off of the specified root: of the file (this matches the approach used in the official Google tool for Windows). * **albums** - contains a folder hierarchy representing the set of albums and shared albums in your library. All - the files are symlinks to content in one of the other folders. The folder names will be + the files are symlinks to content in the photos folder. The folder names will be 'albums/YYYY/MM Original Album Name'. In addition there will be further folders when using the --compare-folder option. The option is used to make a @@ -46,8 +46,8 @@ NOTES: extract metadata from video files and revert to relying on Google Photos meta data and file modified date (this is a much less reliable way to match video files, but the results should be OK if the backup folder was originally created using gphotos-sync). -* If the library contains two separate items that have the same exif UID then this will result in seeing one - pair of duplicates, plus one of those duplicates will appear in the extra_files list. +* If you have shared albums and have clicked 'add to library' on items from others' libraries then you will have two + copies of those items and they will show as duplicates too. Known Issues ------------ diff --git a/gphotos/BadIds.py b/gphotos/BadIds.py index 88d814f8..8cf2e13a 100644 --- a/gphotos/BadIds.py +++ b/gphotos/BadIds.py @@ -1,17 +1,11 @@ from pathlib import Path from yaml import safe_load, safe_dump, YAMLError -from typing import NamedTuple, Dict +from typing import Dict import logging log = logging.getLogger(__name__) -# sadly, I cant use this with safe_load / safe_dump -class Item(NamedTuple): - path: str - product_url: str - - class BadIds: """ keeps a list of media items with ID in a YAML file. The YAML file allows a user to easily investigate their list of media items that have diff --git a/gphotos/BaseMedia.py b/gphotos/BaseMedia.py index 823771db..9518891a 100644 --- a/gphotos/BaseMedia.py +++ b/gphotos/BaseMedia.py @@ -119,3 +119,4 @@ def mime_type(self) -> str: @property def url(self) -> str: raise NotImplementedError + diff --git a/gphotos/DatabaseMedia.py b/gphotos/DatabaseMedia.py index 2d90f98a..4bc6c9c5 100644 --- a/gphotos/DatabaseMedia.py +++ b/gphotos/DatabaseMedia.py @@ -46,7 +46,8 @@ def __init__(self, _description: str = None, _date: datetime = None, _create_date: datetime = None, - _downloaded: bool = False): + _downloaded: bool = False, + _location: str = None): super(DatabaseMedia, self).__init__() # add all of the arguments as attributes on this object self.__dict__.update(locals()) @@ -59,8 +60,15 @@ def update_extra_meta(self, uid, create_date, size): self._create_date = create_date self._size = size + @property + def location(self) -> str: + """ + image GPS information + """ + return self._location + # ----- BaseMedia base class override Properties below ----- - @ property + @property def size(self) -> int: return self._size diff --git a/gphotos/GoogleAlbumsSync.py b/gphotos/GoogleAlbumsSync.py index 297a7eb2..5720a352 100644 --- a/gphotos/GoogleAlbumsSync.py +++ b/gphotos/GoogleAlbumsSync.py @@ -31,8 +31,10 @@ def __init__(self, api: RestClient, root_folder: Path, db: LocalData, db: local database for indexing """ self._root_folder: Path = root_folder - self._links_root = self._root_folder / 'albums' - self._photos_root = self._root_folder / 'photos' + self._photos_folder = Path('photos') + self._albums_folder = Path('albums') + self._links_root = self._root_folder / self._albums_folder + self._photos_root = self._root_folder / self._photos_folder self._db: LocalData = db self._api: RestClient = api self.flush = flush @@ -65,14 +67,30 @@ def fetch_album_contents(self, album_id: str, self._db.put_album_file(album_id, media_item.id) last_date = max(media_item.create_date, last_date) first_date = min(media_item.create_date, first_date) + # this adds other users photos from shared albums - log.debug('Adding album media item %s %s %s', - media_item.relative_path, media_item.filename, - media_item.duplicate_number) + # Todo - This will cause two copies of a file to appear for + # those shared items you have imported into your own library. + # They will have different RemoteIds, one will point to your + # library copy (you own) and one to the shared item in the + # the folder. Currently with the meta data available it would + # be impossible to eliminate these without eliminating other + # cases where date and filename (TITLE) match if add_media_items: - media_item.set_path_by_date(self._photos_root) + media_item.set_path_by_date(self._photos_folder) + (num, row) = self._db.file_duplicate_no( + str(media_item.filename), + str(media_item.relative_folder), + media_item.id) + # we just learned if there were any duplicates in the db + media_item.duplicate_number = num + + log.debug('Adding album media item %s %s %s', + media_item.relative_path, media_item.filename, + media_item.duplicate_number) self._db.put_row( GooglePhotosRow.from_media(media_item), False) + next_page = items_json.get('nextPageToken') if next_page: body = self.make_search_parameters(album_id=album_id, diff --git a/gphotos/GooglePhotosDownload.py b/gphotos/GooglePhotosDownload.py index e7184f9e..f5d69df3 100644 --- a/gphotos/GooglePhotosDownload.py +++ b/gphotos/GooglePhotosDownload.py @@ -50,9 +50,8 @@ def __init__(self, api: RestClient, root_folder: Path, db: LocalData): # attributes to be set after init # thus in theory one instance could so multiple indexes - # those with _ must be set through their set_ function - self._start_date: datetime = None - self._end_date: datetime = None + self.start_date: datetime = None + self.end_date: datetime = None self.retry_download: bool = False self.video_timeout: int = 2000 self.image_timeout: int = 60 @@ -73,12 +72,6 @@ def __init__(self, api: RestClient, root_folder: Path, db: LocalData): 'https://', HTTPAdapter(max_retries=retries, pool_maxsize=self.MAX_THREADS)) - def set_start_date(self, val: str): - self._start_date = Utils.string_to_date(val) - - def set_end_date(self, val: str): - self._end_date = Utils.string_to_date(val) - def download_photo_media(self): """ here we batch up our requests to get base url for downloading media. @@ -101,8 +94,8 @@ def grouper( for media_items_block in grouper( self._db.get_rows_by_search( GooglePhotosRow, - start_date=self._start_date, - end_date=self._end_date, + start_date=self.start_date, + end_date=self.end_date, skip_downloaded=not self.retry_download)): batch = {} diff --git a/gphotos/GooglePhotosIndex.py b/gphotos/GooglePhotosIndex.py index 2f69666a..be8a88f0 100644 --- a/gphotos/GooglePhotosIndex.py +++ b/gphotos/GooglePhotosIndex.py @@ -32,19 +32,12 @@ def __init__(self, api: RestClient, root_folder: Path, db: LocalData): Utils.minimum_date() # attributes to be set after init - # those with _ must be set through their set_ function # thus in theory one instance could so multiple indexes - self._start_date: datetime = None - self._end_date: datetime = None + self.start_date: datetime = None + self.end_date: datetime = None self.include_video: bool = True self.rescan: bool = False - def set_start_date(self, val: str): - self._start_date = Utils.string_to_date(val) - - def set_end_date(self, val: str): - self._end_date = Utils.string_to_date(val) - def check_for_removed_in_folder(self, folder: Path): for pth in folder.iterdir(): if pth.is_dir(): @@ -130,10 +123,10 @@ def index_photos_media(self) -> bool: if self.rescan: start_date = None else: - start_date = self._start_date or self._db.get_scan_date() + start_date = self.start_date or self._db.get_scan_date() items_json = self.search_media(start_date=start_date, - end_date=self._end_date, + end_date=self.end_date, do_video=self.include_video) while items_json: @@ -149,6 +142,7 @@ def index_photos_media(self) -> bool: media_item.id) # we just learned if there were any duplicates in the db media_item.duplicate_number = num + if not row: self.files_indexed += 1 log.info("Indexed %d %s", self.files_indexed, @@ -174,14 +168,14 @@ def index_photos_media(self) -> bool: if next_page: items_json = self.search_media(page_token=next_page, start_date=start_date, - end_date=self._end_date, + end_date=self.end_date, do_video=self.include_video) else: break # scan (in reverse date order) completed so the next incremental scan # can start from the most recent file in this scan - if not self._start_date: + if not self.start_date: self._db.set_scan_date(last_date=self.latest_download) return self.files_indexed > 0 @@ -211,5 +205,5 @@ def get_extra_meta(self): self._db.store() else: log.debug('skipping metadata (already scanned) on %s', - file_path) + file_path) log.warning('updating index with extra metadata complete') diff --git a/gphotos/GooglePhotosRow.py b/gphotos/GooglePhotosRow.py index 383d155a..bfb32121 100644 --- a/gphotos/GooglePhotosRow.py +++ b/gphotos/GooglePhotosRow.py @@ -26,7 +26,7 @@ class GooglePhotosRow(DbRow): 'FileName': str, 'OrigFileName': str, 'DuplicateNo': int, 'FileSize': int, 'MimeType': str, 'Description': str, 'ModifyDate': datetime, 'CreateDate': datetime, - 'SyncDate': datetime, 'Downloaded': int} + 'SyncDate': datetime, 'Downloaded': int, 'Location': str} no_update = ['Id'] def to_media(self) -> DatabaseMedia: @@ -44,7 +44,8 @@ def to_media(self) -> DatabaseMedia: _description=self.Description, _date=self.ModifyDate, _create_date=self.CreateDate, - _downloaded=self.Downloaded) + _downloaded=self.Downloaded, + _location=self.Location) return db_media @classmethod @@ -63,5 +64,6 @@ def from_media(cls, media: GooglePhotosMedia) -> G: ModifyDate=media.modify_date, CreateDate=media.create_date, SyncDate=now_time, - Downloaded=0) + Downloaded=0, + Location='') return new_row diff --git a/gphotos/LocalData.py b/gphotos/LocalData.py index 06c6934c..5768ec74 100644 --- a/gphotos/LocalData.py +++ b/gphotos/LocalData.py @@ -25,7 +25,7 @@ class LocalData: DB_FILE_NAME: str = 'gphotos.sqlite' BLOCK_SIZE: int = 10000 - VERSION: float = 5.5 + VERSION: float = 5.6 def __init__(self, root_folder: Path, flush_index: bool = False): """ Initialize a connection to the DB and create some cursors. @@ -122,12 +122,17 @@ def get_scan_date(self) -> datetime: # functions for managing the (any) Media Tables ########################### # noinspection SqlResolve - def put_row(self, row: DbRow, update=False): + def put_row(self, row: DbRow, update=False, album=False): try: if update: - # noinspection PyUnresolvedReferences - query = "UPDATE {0} Set {1} WHERE RemoteId = '{2}'".format( - row.table, row.update, row.RemoteId) + if album: + # noinspection PyUnresolvedReferences + query = "UPDATE {0} Set {1} WHERE RemoteId = '{2}'".format( + row.table, row.update, row.RemoteId) + else: + # noinspection PyUnresolvedReferences + query = "UPDATE {0} Set {1} WHERE RemoteId = '{2}'".format( + row.table, row.update, row.RemoteId) else: # EXISTS - allows for no action when trying to re-insert # noinspection PyUnresolvedReferences @@ -253,6 +258,11 @@ def file_duplicate_no(self, name: str, # the file is new and has no duplicates return 0, None + def put_location(self, sync_file_id: str, location: str): + self.cur.execute( + "UPDATE SyncFiles SET Location=? " + "WHERE RemoteId IS ?;", (location, sync_file_id)) + def put_downloaded(self, sync_file_id: str, downloaded: bool = True): self.cur.execute( "UPDATE SyncFiles SET Downloaded=? " diff --git a/gphotos/LocalFilesMedia.py b/gphotos/LocalFilesMedia.py index ad64151d..bfabaffd 100644 --- a/gphotos/LocalFilesMedia.py +++ b/gphotos/LocalFilesMedia.py @@ -8,8 +8,8 @@ from .BaseMedia import BaseMedia from typing import Dict, List, Union, Any, Optional from datetime import datetime -import piexif -import magic +from mimetypes import guess_type +import exif import re JSONValue = Union[str, int, float, bool, None, Dict[str, Any], List[Any]] @@ -36,11 +36,10 @@ class LocalFilesMedia(BaseMedia): - mime = magic.Magic(mime=True) - def __init__(self, full_path: Path): super(LocalFilesMedia, self).__init__() - self.__mime_type: str = self.mime.from_file(str(full_path)) + (mime, _) = guess_type(str(full_path)) + self.__mime_type: str = mime or 'application/octet-stream' self.is_video: bool = self.__mime_type.startswith('video') self.__full_path: Path = full_path self.__original_name: Path = full_path.name @@ -90,31 +89,29 @@ def get_video_meta(self): self.__full_path.stat().st_mtime) def get_image_date(self): - photo_date = None + p_date = None if self.got_meta: try: - d_bytes = self.__exif.get(piexif.ExifIFD.DateTimeOriginal) - photo_date = Utils.string_to_date(d_bytes.decode("utf-8")) - except (KeyError, ValueError, AttributeError): + # noinspection PyUnresolvedReferences + p_date = Utils.string_to_date(self.__exif.datetime_original) + except AttributeError: try: - d_bytes = self.__exif_0.get(piexif.ImageIFD.DateTime) - photo_date = Utils.string_to_date(d_bytes.decode("utf-8")) - except (KeyError, ValueError, AttributeError): + # noinspection PyUnresolvedReferences + p_date = Utils.string_to_date(self.__exif.datetime) + except AttributeError: pass - - if not photo_date: + if not p_date: # just use file date - photo_date = datetime.utcfromtimestamp( + p_date = datetime.utcfromtimestamp( self.__full_path.stat().st_mtime) - self.__createDate = photo_date + self.__createDate = p_date def get_exif(self): try: - exif = piexif.load(str(self.__full_path)) + with open('image_file.jpg', 'rb') as image_file: + self.__exif = exif.Image(image_file) self.got_meta = True - self.__exif_0: dict = exif["0th"] - self.__exif: dict = exif["Exif"] - except piexif.InvalidImageDataError: + except IOError: self.got_meta = False @property @@ -124,8 +121,10 @@ def uid(self) -> str: elif self.is_video: uid = 'not_supported' else: - uid = self.__exif.get(piexif.ExifIFD.ImageUniqueID) - if not uid: + try: + # noinspection PyUnresolvedReferences + uid = self.__exif.image_unique_id + except AttributeError: uid = 'no_uid_in_exif' return uid @@ -145,7 +144,11 @@ def id(self) -> Optional[str]: @property def description(self) -> str: - d = self.__exif_0.get(piexif.ImageIFD.ImageDescription) + try: + # noinspection PyUnresolvedReferences + d = self.__exif.image_unique_id + except AttributeError: + d = None if d: result = d.decode("utf-8") if result in HUAWEI_JUNK: @@ -176,4 +179,10 @@ def url(self) -> Optional[str]: @property def camera_model(self): - return self.__exif_0.get(piexif.ImageIFD.CameraSerialNumber) + try: + # noinspection PyUnresolvedReferences + cam = '{} {}'.format( + self.__exif.make, self.__exif.model) + except AttributeError: + cam = None + return cam diff --git a/gphotos/LocalFilesScan.py b/gphotos/LocalFilesScan.py index fe9eead3..bdbd90d2 100644 --- a/gphotos/LocalFilesScan.py +++ b/gphotos/LocalFilesScan.py @@ -5,7 +5,6 @@ import shutil from typing import Callable from .LocalData import LocalData -import piexif import logging from .LocalFilesMedia import LocalFilesMedia from .LocalFilesRow import LocalFilesRow @@ -28,12 +27,18 @@ def __init__(self, root_folder: Path, scan_folder: Path, db: LocalData): """ self._scan_folder: Path = scan_folder self._root_folder: Path = root_folder + self._comparison_folder = self._root_folder / 'comparison' self._ignore_files: str = str(root_folder / '*gphotos*') - self._ignore_folders = [root_folder/path for path in IGNORE_FOLDERS] + self._ignore_folders = [root_folder / path for path in IGNORE_FOLDERS] self._db: LocalData = db self.count = 0 def scan_local_files(self): + # for self-comparison, make sure there is no comparison folder + # or we'll get recursive entries + if self._comparison_folder.exists(): + log.debug('removing previous comparison tree') + shutil.rmtree(self._comparison_folder) log.warning('Indexing comparison folder %s', self._scan_folder) self.scan_folder(self._scan_folder, self.index_local_item) log.warning("Indexed %d files in comparison folder %s", @@ -71,39 +76,14 @@ def index_local_item(self, path: Path) -> int: raise return result - @classmethod - def dump_exif(cls, path: Path): - count = 0 - # use this for analysis if struggling to find relevant EXIF tags - try: - exif_dict = piexif.load(str(path)) - uid = exif_dict['Exif'].get(piexif.ExifIFD.ImageUniqueID) - if uid and uid != '': - log.warning( - '%s = %s', path, - exif_dict['Exif'].get(piexif.ExifIFD.ImageUniqueID)) - else: - count += 1 - log.warning('No ID on %d %s', count, path) - - # for ifd in ("0th", "Exif", "GPS", "1st"): - # print('--------', ifd) - # for tag in exif_dict[ifd]: - # print(piexif.TAGS[ifd][tag], tag, - # exif_dict[ifd][tag]) - except piexif.InvalidImageDataError: - pass - log.debug("NO EXIF. %s", path) - def find_missing_gphotos(self): log.warning('matching local files and photos library ...') self._db.find_local_matches() log.warning('creating comparison folder ...') - comparison_folder = self._root_folder / 'comparison' - folders_missing = comparison_folder / 'missing_files' - if comparison_folder.exists(): + folders_missing = self._comparison_folder / 'missing_files' + if self._comparison_folder.exists(): log.debug('removing previous comparison tree') - shutil.rmtree(comparison_folder) + shutil.rmtree(self._comparison_folder) for i, orig_path in enumerate(self._db.get_missing_paths()): link_path = folders_missing / \ @@ -114,7 +94,7 @@ def find_missing_gphotos(self): if not link_path.exists(): link_path.symlink_to(orig_path) - folders_extras = comparison_folder / 'extra_files' + folders_extras = self._comparison_folder / 'extra_files' for i, orig_path in enumerate(self._db.get_extra_paths()): link_path = folders_extras / orig_path log.debug('adding extra file %d link %s', i, link_path) @@ -123,7 +103,7 @@ def find_missing_gphotos(self): if not link_path.exists(): link_path.symlink_to(self._root_folder / orig_path) - flat_duplicates = comparison_folder / 'duplicates' + flat_duplicates = self._comparison_folder / 'duplicates' flat_duplicates.mkdir(parents=True) duplicate_group = 0 prev_id = '' diff --git a/gphotos/LocationExtract.py b/gphotos/LocationExtract.py new file mode 100644 index 00000000..c8095eb0 --- /dev/null +++ b/gphotos/LocationExtract.py @@ -0,0 +1,124 @@ +from time import sleep +import pickle + +from appdirs import AppDirs +from pathlib import Path +from getpass import getpass +from selenium import webdriver +from selenium.webdriver import ChromeOptions +from selenium.common.exceptions import WebDriverException +from urllib.parse import urlparse, parse_qs + +import logging + +log = logging.getLogger(__name__) + +CHROME_DRIVER_PATH = 'chromedriver' +XPATH_MAP_URL = '//div[starts-with(@data-mapurl,"https:")]' +XPATH_FILENAME = '//div[starts-with(@aria-label,"Filename")]' +XPATH_INFO_BUTTON = '//button[@title="Info"]' + + +class LocationExtract: + def __init__(self, with_gui: bool = False): + self.user: str = None + self.pwd: str = None + self.with_gui = with_gui + self.driver: webdriver.Chrome = None + app_dirs = AppDirs('gphotos-sync') + self.cookie_file: Path = Path( + app_dirs.user_cache_dir) / ".gphotos_cookies" + if not self.cookie_file.parent.is_dir(): + self.cookie_file.parent.mkdir(parents=True) + + def get_credentials(self, user: str = None, pwd: str = None): + self.user = user or input('Google Photos User Name: ') + self.pwd = pwd or getpass() + + def authenticate(self, url: str): + options = ChromeOptions() + if not self.with_gui: + options.headless = True + self.driver = webdriver.Chrome(CHROME_DRIVER_PATH, + chrome_options=options) + self.driver.implicitly_wait(2) + self.driver.get("https://www.google.com") + + if self.cookie_file.exists(): + cookies = pickle.load(open(str(self.cookie_file), 'rb')) + for cookie in cookies: + self.driver.add_cookie(cookie) + + self.driver.get(url) + if str(self.driver.current_url).startswith( + 'https://accounts.google.com'): + # we have been re-directed to Google Authentication + if not self.with_gui: + if self.user is None: + self.get_credentials() + identifier = self.driver.find_element_by_id('identifierId') + identifier.send_keys(self.user) + id_next = self.driver.find_element_by_id('identifierNext') + id_next.click() + pwd = self.driver.find_element_by_name('password') + pwd.send_keys(self.pwd) + sleep(.1) + pwd_next = self.driver.find_element_by_id('passwordNext') + pwd_next.click() + + # wait for authentication (including two step) to be completed + while self.driver.current_url != url: + sleep(1) + pickle.dump(self.driver.get_cookies(), + open(str(self.cookie_file), "wb")) + + def extract_location(self, url: str): + location = None + filename = None + if self.driver is None: + self.authenticate(url) + else: + self.driver.get(url) + + try: + info_button = self.driver.find_element_by_xpath(XPATH_INFO_BUTTON) + file = self.driver.find_elements_by_xpath(XPATH_FILENAME) + if len(file) == 0: + info_button.click() + file = self.driver.find_element_by_xpath(XPATH_FILENAME) + filename = file.text + else: + filename = file[0].text + map_urls = self.driver.find_elements_by_xpath(XPATH_MAP_URL) + + if len(map_urls) == 0: + log.warning('no location for %s', filename) + else: + location = map_urls[0].get_attribute("data-mapurl") + except WebDriverException: + log.warning('cannot fetch GPS info for %s', filename) + + if location: + parsed = urlparse(location) + params = parse_qs(parsed.query) + location = params.get('center') + if location: + location = location[0] + log.info('%s GPS location is %s', filename, location) + return location + + @staticmethod + def to_deg(value, loc): + if value < 0: + loc_value = loc[0] + elif value > 0: + loc_value = loc[1] + else: + loc_value = "" + abs_value = abs(value) + deg = int(abs_value) + t1 = (abs_value-deg)*60 + minutes = int(t1) + sec = round((t1 - minutes) * 60, 5) + return deg, minutes, sec, loc_value + diff --git a/gphotos/LocationUpdate.py b/gphotos/LocationUpdate.py new file mode 100644 index 00000000..0a1f8623 --- /dev/null +++ b/gphotos/LocationUpdate.py @@ -0,0 +1,53 @@ +from pathlib import Path +from datetime import datetime + +from gphotos import Utils +from gphotos.GooglePhotosRow import GooglePhotosRow +from gphotos.LocalData import LocalData +from gphotos.LocationExtract import LocationExtract + +import logging + +log = logging.getLogger(__name__) + + +class LocationUpdate: + def __init__(self, root_folder: Path, db: LocalData): + self._root_folder: Path = root_folder + self._db: LocalData = db + self._media_folder: Path = Path('photos') + self.files_indexed: int = 0 + self.files_index_skipped: int = 0 + self.start_date: datetime = None + self.end_date: datetime = None + if db: + self.latest_download = self._db.get_scan_date() or \ + Utils.minimum_date() + self.extractor = LocationExtract() + + def index_locations(self): + count = 0 + log.warning('indexing image locations via Google Photos Web ...') + media_items = self._db.get_rows_by_search( + GooglePhotosRow, + start_date=self.start_date, + end_date=self.end_date) + for item in media_items: + file_path = self._root_folder / item.relative_path + log.debug('extracting location %d for %s', count, file_path) + location = self.extractor.extract_location(item.url) + if location is None: + self._db.put_location(item.id, 'none') + else: + log.info('location %d for %s is %s', + count, file_path, location) + count += 1 + self._db.put_location(item.id, location) + if count % 2000 == 0: + self._db.store() + log.warning('indexing image locations complete') + + def set_locations(self): + # this will insert location into the local files EXIF + pass + diff --git a/gphotos/Main.py b/gphotos/Main.py index 8c081877..421f48ec 100644 --- a/gphotos/Main.py +++ b/gphotos/Main.py @@ -15,6 +15,8 @@ from gphotos.authorize import Authorize from gphotos.restclient import RestClient from gphotos.LocalFilesScan import LocalFilesScan +from gphotos.LocationUpdate import LocationUpdate +from gphotos import Utils import pkg_resources __version__ = pkg_resources.require("gphotos-sync")[0].version @@ -34,6 +36,9 @@ def __init__(self): self.google_photos_down: GooglePhotosDownload = None self.google_albums_sync: GoogleAlbumsSync = None self.local_files_scan: LocalFilesScan = None + self.location_update: LocationUpdate = None + self._start_date = None + self._end_date = None self.auth: Authorize = None @@ -118,8 +123,13 @@ def __init__(self): "--skip-albums", action='store_true', help="Dont download albums (for testing)") + parser.add_argument( + "--get-locations", + action='store_true', + help="Scrape the Google Photos website for location metadata" + " and add it to the local files' EXIF metadata") - def setup(self, args: Namespace, db_path: Path): + def setup(self, args: Namespace, db_path: Path): root_folder = Path(args.root_folder).absolute() compare_folder = None if args.compare_folder: @@ -152,14 +162,21 @@ def setup(self, args: Namespace, db_path: Path): self.google_albums_sync = GoogleAlbumsSync( self.google_photos_client, root_folder, self.data_store, args.flush_index or args.retry_download or args.rescan) + self.location_update = LocationUpdate(root_folder, self.data_store) if args.compare_folder: self.local_files_scan = LocalFilesScan( root_folder, compare_folder, self.data_store) - self.google_photos_idx.set_start_date(args.start_date) - self.google_photos_idx.set_end_date(args.end_date) - self.google_photos_down.set_start_date(args.start_date) - self.google_photos_down.set_end_date(args.end_date) + self._start_date = Utils.string_to_date(args.start_date) + self._end_date = Utils.string_to_date(args.end_date) + + self.google_photos_idx.start_date = self._start_date + self.google_photos_idx.end_date = self._end_date + self.google_photos_down.start_date = self._start_date + self.google_photos_down.end_date = self._end_date + self.location_update.start_date = self._start_date + self.location_update.end_date = self._end_date + self.google_photos_idx.include_video = not args.skip_video self.google_photos_idx.rescan = args.rescan self.google_photos_down.retry_download = args.retry_download @@ -201,7 +218,14 @@ def logging(cls, args: Namespace, folder: Path): # add the handler to the root logger logging.getLogger('').addHandler(console) - def start(self, args: Namespace): + def do_location(self, args: Namespace): + with self.data_store: + if not args.skip_index: + self.location_update.index_locations() + if not args.index_only: + self.location_update.set_locations() + + def do_sync(self, args: Namespace): new_files = True with self.data_store: if not args.skip_index: @@ -222,10 +246,17 @@ def start(self, args: Namespace): self.google_photos_idx.check_for_removed() if args.compare_folder: - self.local_files_scan.scan_local_files() - self.google_photos_idx.get_extra_meta() + if not args.skip_index: + self.local_files_scan.scan_local_files() + self.google_photos_idx.get_extra_meta() self.local_files_scan.find_missing_gphotos() + def start(self, args: Namespace): + if args.get_locations: + self.do_location(args) + else: + self.do_sync(args) + def main(self, test_args: dict = None): start_time = datetime.now() args = self.parser.parse_args(test_args) diff --git a/gphotos/Queries.py b/gphotos/Queries.py index 6803e8ce..d2ebfc2c 100644 --- a/gphotos/Queries.py +++ b/gphotos/Queries.py @@ -1,21 +1,25 @@ # coding: utf8 +# noinspection SqlWithoutWhere match = \ [""" +-- stage 0 - remove previous matches +UPDATE LocalFiles +set RemoteId = NULL ; +""", + """ -- stage 1 - look for unique matches - UPDATE LocalFiles +UPDATE LocalFiles set RemoteId = (SELECT RemoteId FROM SyncFiles - WHERE (LocalFiles.OriginalFileName == SyncFiles.OrigFileName or - LocalFiles.FileName == SyncFiles.FileName) + WHERE LocalFiles.OriginalFileName == SyncFiles.OrigFileName AND (LocalFiles.Uid == SyncFiles.Uid AND LocalFiles.CreateDate = SyncFiles.CreateDate) -- 32 character ids are legitimate and unique OR (LocalFiles.Uid == SyncFiles.Uid AND length(LocalFiles.Uid) == 32) ) -WHERE LocalFiles.Uid notnull and LocalFiles.Uid != 'not_supported' and -LocalFiles.RemoteId ISNULL +WHERE LocalFiles.Uid notnull and LocalFiles.Uid != 'not_supported' ; """, """ @@ -26,8 +30,7 @@ UPDATE LocalFiles set RemoteId = (SELECT RemoteId FROM SyncFiles - WHERE (LocalFiles.OriginalFileName == SyncFiles.OrigFileName or - LocalFiles.FileName == SyncFiles.FileName) + WHERE LocalFiles.OriginalFileName == SyncFiles.OrigFileName AND LocalFiles.CreateDate = SyncFiles.CreateDate AND SyncFiles.RemoteId NOT IN (select RemoteId from pre_match) ) @@ -35,15 +38,13 @@ ; """, """ --- stage 3 FINAL - mop up on filename and file size +-- stage 3 FINAL - mop up on filename only with pre_match(RemoteId) as (SELECT RemoteId from LocalFiles where RemoteId notnull) UPDATE LocalFiles set RemoteId = (SELECT RemoteId FROM SyncFiles - WHERE (LocalFiles.OriginalFileName == SyncFiles.OrigFileName or - LocalFiles.FileName == SyncFiles.FileName) - AND SyncFiles.FileSize == LocalFiles.FileSize + WHERE LocalFiles.OriginalFileName == SyncFiles.OrigFileName AND SyncFiles.RemoteId NOT IN (select RemoteId from pre_match) ) WHERE LocalFiles.RemoteId isnull @@ -53,13 +54,8 @@ missing_files = """select * from LocalFiles where RemoteId isnull;""" extra_files = """ -select * -from Syncfiles -where RemoteId - in (SELECT S.RemoteId - FROM SyncFiles S - LEFT JOIN LocalFiles L ON S.RemoteId = L.RemoteId - WHERE L.RemoteId ISNULL) +select * from SyncFiles where RemoteId not in (select RemoteId from LocalFiles) +and uid not in (select uid from LocalFiles where length(SyncFiles.Uid) = 32) ; """ diff --git a/gphotos/sql/gphotos_create.sql b/gphotos/sql/gphotos_create.sql index b3923fc5..fdcfd0d7 100644 --- a/gphotos/sql/gphotos_create.sql +++ b/gphotos/sql/gphotos_create.sql @@ -78,7 +78,8 @@ create table SyncFiles ModifyDate INT, CreateDate INT, SyncDate INT, - Downloaded INT DEFAULT 0 + Downloaded INT DEFAULT 0, + Location Text ); DROP INDEX IF EXISTS RemoteIdIdx; diff --git a/setup.py b/setup.py index 1c9a8489..d78d34a2 100755 --- a/setup.py +++ b/setup.py @@ -5,12 +5,12 @@ module_name = "gphotos-sync" install_reqs = [ - 'python-magic', - 'piexif', + 'exif', 'urllib3', 'appdirs', 'requests', 'requests_oauthlib', + 'selenium', 'PyYaml', ] @@ -19,8 +19,8 @@ setup( name=module_name, - version='2.8.4', - python_requires='>=3.6', + version='2.8.5', + python_requires='>=3.7', license='MIT', platforms=['Linux', 'Windows', 'Mac'], description='Google Photos backup tool', diff --git a/test/test_credentials/.gphotos.token b/test/test_credentials/.gphotos.token index eb14c423..f3d86282 100644 --- a/test/test_credentials/.gphotos.token +++ b/test/test_credentials/.gphotos.token @@ -1 +1 @@ -{"access_token": "ya29.GlzEBqv-UThDHP8JQK-EQmbXLhnsztSoy4tIOABr_rtVCnohOcGuDa0gY9eD6D_ZjqBr4MCDvEVyXiQeDalxqSB0Xgs5ooMMxU4GUkS-XXIcvpW2iL6FrHOZGFTX7Q", "expires_in": 3600, "scope": ["https://www.googleapis.com/auth/photoslibrary.readonly", "https://www.googleapis.com/auth/photoslibrary.sharing"], "token_type": "Bearer", "expires_at": 1551915684.420522, "refresh_token": "1/HG0feqbbu7FZLjztEbGneV0Jz2aNoiNYuFIHvcZ9MgQ"} \ No newline at end of file +{"access_token": "ya29.GlzKBi_JK3USsbLZfNXjAMkcFa3IPGFEviVdPhvBD6jmVJDwvq_b0L2V95LNzxN9Fx-nYI8X52_JKHEFh8HgqgDLDfSci0c1cky6_7f2RNkUY74ZzdPUFRU2ANsumg", "expires_in": 3600, "scope": ["https://www.googleapis.com/auth/photoslibrary.readonly", "https://www.googleapis.com/auth/photoslibrary.sharing"], "token_type": "Bearer", "expires_at": 1552425508.557461, "refresh_token": "1/HG0feqbbu7FZLjztEbGneV0Jz2aNoiNYuFIHvcZ9MgQ"} \ No newline at end of file diff --git a/test/test_units.py b/test/test_units.py index 13e2eb17..7292cd4b 100644 --- a/test/test_units.py +++ b/test/test_units.py @@ -1,7 +1,6 @@ from _datetime import datetime from pathlib import Path from requests import exceptions as exc -import piexif from unittest import TestCase from gphotos.GoogleAlbumMedia import GoogleAlbumMedia import json @@ -54,44 +53,26 @@ def test_download_timeout(self): # .2 timeout by 5 retries = 1 sec self.assertGreater(elapsed.seconds, 1) - @classmethod - def dump_exif(cls, p: Path): - # use this for analysis if struggling to find relevant EXIF tags - try: - exif_dict = piexif.load(str(p)) - for ifd in ("0th", "Exif", "GPS", "1st"): - print('--------', ifd) - for tag in exif_dict[ifd]: - print(piexif.TAGS[ifd][tag], tag, - exif_dict[ifd][tag]) - except piexif.InvalidImageDataError: - print("no EXIF") - def test_jpg_description(self): p = test_data / 'IMG_20190102_112832.jpg' lfm = LocalFilesMedia(p) - self.dump_exif(p) self.assertEqual(lfm.description, '') p = test_data / '20180126_185832.jpg' lfm = LocalFilesMedia(p) - self.dump_exif(p) self.assertEqual(lfm.description, '') p = test_data / '1987-JohnWoodAndGiles.jpg' lfm = LocalFilesMedia(p) - self.dump_exif(p) self.assertEqual(lfm.description, '') def test_jpg_description2(self): p = test_data / 'IMG_20180908_132733-gphotos.jpg' lfm = LocalFilesMedia(p) - self.dump_exif(p) self.assertEqual(lfm.description, '') p = test_data / 'IMG_20180908_132733-insync.jpg' lfm = LocalFilesMedia(p) - self.dump_exif(p) self.assertEqual(lfm.description, '') def test_empty_media(self):