Skip to content

Update for 5.2 and lxml instead of Grab #15

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ A Python 3 script to __automate the download of SQL backups via a
This is useful when your web hosting provider does not grant you access to a console (for `mysqldump`) but
you want to automate the backup of your database (without having to manually use the browser).

It has been tested with Python 3.4+ on Linux and Windows and the following versions of phpMyAdmin:
`4.3.x - 4.8.x, 5.0.0`
It has been tested with Python 3.8 on Linux and the following versions of phpMyAdmin:
`5.2`

_Note_: The web interface of phpMyAdmin may change in the future and break this script. Please file a bug report
(including your version of phpMyAdmin) if you encounter this issue.
Expand Down Expand Up @@ -83,9 +83,8 @@ UTC date / time to the directory `/tmp`, e.g. `/tmp/2016-03-11--15-19-04-UTC_exa

## Requirements

- A [Python 3.4+](https://www.python.org/) installation on your system
- [Grab - python web-scraping framework](http://grablib.org/): Install via `pip install -U Grab` or see
the [installation instructions](http://docs.grablib.org/en/latest/usage/installation.html) if you run into problems.
- A [Python 3.8+](https://www.python.org/) installation on your system
- Requirements - `pip install -r requirements.txt`

__Note for Windows users__: while it is possible to install the requirements natively, it is often easier to use the
[Windows Subsystem for Linux](https://docs.microsoft.com/en-us/windows/wsl/install-win10) if you are using Windows 10
Expand Down
130 changes: 86 additions & 44 deletions phpmyadmin_sql_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,64 +28,103 @@
import os
import re
import sys
from itertools import product
from urllib.parse import urljoin

import grab
import requests
from lxml import html

__version__ = '2019-05-07.1'
__version__ = '2024-12-01'

CONTENT_DISPOSITION_FILENAME_RE = re.compile(r'^.*filename="(?P<filename>[^"]+)".*$')
DEFAULT_PREFIX_FORMAT = r'%Y-%m-%d--%H-%M-%S-UTC_'


def is_login_successful(g):
return g.doc.text_search("frame_content") or g.doc.text_search("server_export.php")
def is_login_successful(tree):
hrefs = tree.xpath("//a/@href")
target_substrings = ["frame_content", "server_export.php", "index.php?route=/server/export"]
combinations = product(target_substrings, hrefs)


def open_frame_if_phpmyadmin_3(g):
frame_url_selector = g.doc.select("id('frame_content')/@src")
if frame_url_selector.exists():
g.go(frame_url_selector.text())
return any(substring in href for substring, href in combinations)


def download_sql_backup(url, user, password, dry_run=False, overwrite_existing=False, prepend_date=True, basename=None,
output_directory=os.getcwd(), exclude_dbs=None, compression='none', prefix_format=None,
timeout=60, http_auth=None, server_name=None, **kwargs):
prefix_format = prefix_format or DEFAULT_PREFIX_FORMAT
exclude_dbs = exclude_dbs.split(',') or []
encoding = '' if compression == 'gzip' else 'gzip'

g = grab.Grab(encoding=encoding, timeout=timeout)
if http_auth:
g.setup(userpwd=http_auth)
g.go(url)

g.doc.set_input_by_id('input_username', user)
g.doc.set_input_by_id('input_password', password)
if server_name:
g.doc.set_input_by_id('input_servername', server_name)
g.submit()

if not is_login_successful(g):
raise ValueError('Could not login - did you provide the correct username / password?')

open_frame_if_phpmyadmin_3(g)

export_url = g.doc.select("id('topmenu')//a[contains(@href,'server_export.php')]/@href").text()
g.go(export_url)

dbs_available = [option.attrib['value'] for option in g.doc.form.inputs['db_select[]']]
exclude_dbs = exclude_dbs.split(',') if exclude_dbs else []
session = requests.Session()

# Login
response = session.get(url, timeout=timeout)
if response.status_code != 200:
raise ValueError("Failed to load the login page.")

tree = html.fromstring(response.content)
form_action = tree.xpath("//form[@id='login_form']/@action")
form_action = form_action[0] if form_action else url

form_data = {
"pma_username": user,
"pma_password": password,
}

hidden_inputs = tree.xpath("//form[@id='login_form']//input[@type='hidden']")
for hidden_input in hidden_inputs:
name = hidden_input.get("name")
value = hidden_input.get("value", "")
if name:
form_data[name] = value

login_response = session.post(urljoin(url,form_action), data=form_data, timeout=timeout)

if login_response.status_code != 200:
raise ValueError("Could not log in. Please check your credentials.")

tree = html.fromstring(login_response.content)
if not is_login_successful(tree):
raise ValueError("Could not log in. Please check your credentials.")

# Extract export URL
export_url = tree.xpath("id('topmenu')//a[contains(@href,'server_export.php')]/@href")
if not export_url:
export_url = tree.xpath("id('topmenu')//a[contains(@href,'index.php?route=/server/export')]/@href")
if not export_url:
raise ValueError("Could not find export URL.")
export_url = export_url[0]

# Access export page
export_response = session.get(urljoin(url,export_url), timeout=timeout)
export_tree = html.fromstring(export_response.content)


# Determine databases to dump
dbs_available = export_tree.xpath("//select[@name='db_select[]']/option/@value")
dbs_to_dump = [db_name for db_name in dbs_available if db_name not in exclude_dbs]
if not dbs_to_dump:
print('Warning: no databases to dump (databases available: "{}")'.format('", "'.join(dbs_available)),
file=sys.stderr)

file_response = g.submit(
extra_post=[('db_select[]', db_name) for db_name in dbs_to_dump] + [('compression', compression)])

re_match = CONTENT_DISPOSITION_FILENAME_RE.match(g.doc.headers['Content-Disposition'])
print(f'Warning: no databases to dump (databases available: "{", ".join(dbs_available)}")',
file=sys.stderr)

# Prepare form data
dump_form_action = export_tree.xpath("//form[@name='dump']/@action")[0]
form_data = {'db_select[]': dbs_to_dump}
form_data['compression'] = compression
form_data['what'] = 'sql'
form_data['filename_template'] = '@SERVER@'
form_data['sql_structure_or_data'] = 'structure_and_data'
dump_hidden_inputs = export_tree.xpath("//form[@name='dump']//input[@type='hidden']")
for hidden_input in dump_hidden_inputs:
name = hidden_input.get("name")
value = hidden_input.get("value", "")
if name:
form_data[name] = value

# Submit form and download file
file_response = session.post(urljoin(url, dump_form_action), data=form_data, timeout=timeout, stream=True)
content_disposition = file_response.headers.get('Content-Disposition', '')
re_match = CONTENT_DISPOSITION_FILENAME_RE.match(content_disposition)
if not re_match:
raise ValueError(
'Could not determine SQL backup filename from {}'.format(g.doc.headers['Content-Disposition']))
raise ValueError(f"Could not determine SQL backup filename from {content_disposition}")

content_filename = re_match.group('filename')
filename = content_filename if basename is None else basename + os.path.splitext(content_filename)[1]
Expand All @@ -97,16 +136,19 @@ def download_sql_backup(url, user, password, dry_run=False, overwrite_existing=F
if os.path.isfile(out_filename) and not overwrite_existing:
basename, ext = os.path.splitext(out_filename)
n = 1
print('File {} already exists, to overwrite it use --overwrite-existing'.format(out_filename), file=sys.stderr)
print(f'File {out_filename} already exists, to overwrite it use --overwrite-existing', file=sys.stderr)
while True:
alternate_out_filename = '{}_({}){}'.format(basename, n, ext)
alternate_out_filename = f'{basename}_({n}){ext}'
if not os.path.isfile(alternate_out_filename):
out_filename = alternate_out_filename
break
n += 1

# Save file if not dry run
if not dry_run:
file_response.save(out_filename)
with open(out_filename, 'wb') as f:
for chunk in file_response.iter_content(chunk_size=8192):
f.write(chunk)

return out_filename

Expand Down
6 changes: 6 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
certifi==2024.8.30
charset-normalizer==3.4.0
idna==3.10
lxml==5.3.0
requests==2.32.3
urllib3==2.2.3